You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

332 lines
7.7 KiB
C++

#include "webcc/url.h"
#include <algorithm>
#include <functional>
#include <sstream>
#include "boost/algorithm/string.hpp"
namespace webcc {
// -----------------------------------------------------------------------------
// Helper functions to decode URL string.
namespace {
// Convert a hex character digit to a decimal character value.
bool HexToDecimal(char hex, int* decimal) {
if (hex >= '0' && hex <= '9') {
*decimal = hex - '0';
} else if (hex >= 'A' && hex <= 'F') {
*decimal = 10 + (hex - 'A');
} else if (hex >= 'a' && hex <= 'f') {
*decimal = 10 + (hex - 'a');
} else {
return false;
}
return true;
}
bool Decode(const std::string& encoded, std::string* raw) {
for (auto iter = encoded.begin(); iter != encoded.end(); ++iter) {
if (*iter == '%') {
if (++iter == encoded.end()) {
// Invalid URI string, two hexadecimal digits must follow '%'.
return false;
}
int h_decimal = 0;
if (!HexToDecimal(*iter, &h_decimal)) {
return false;
}
if (++iter == encoded.end()) {
// Invalid URI string, two hexadecimal digits must follow '%'.
return false;
}
int l_decimal = 0;
if (!HexToDecimal(*iter, &l_decimal)) {
return false;
}
raw->push_back(static_cast<char>((h_decimal << 4) + l_decimal));
} else if (*iter > 127 || *iter < 0) {
// Invalid encoded URI string, must be entirely ASCII.
return false;
} else {
raw->push_back(*iter);
}
}
return true;
}
// Encodes all characters not in given set determined by given function.
std::string EncodeImpl(const std::string& raw,
std::function<bool(int)> should_encode) {
const char* const hex = "0123456789ABCDEF";
std::string encoded;
for (auto iter = raw.begin(); iter != raw.end(); ++iter) {
// For UTF8 encoded string, char ASCII can be greater than 127.
int ch = static_cast<unsigned char>(*iter);
// |ch| should be the same under both UTF8 and UTF16.
if (should_encode(ch)) {
encoded.push_back('%');
encoded.push_back(hex[(ch >> 4) & 0xF]);
encoded.push_back(hex[ch & 0xF]);
} else {
// ASCII doesn't need to be encoded, it should be the same under both
// UTF8 and UTF16.
encoded.push_back(static_cast<char>(ch));
}
}
return encoded;
}
// Our own implementation of alpha numeric instead of std::isalnum to avoid
// taking global lock for performance reasons.
inline bool IsAlphaNumeric(char c) {
return (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z');
}
// Unreserved characters are those that are allowed in a URL/URI but do not have
// a reserved purpose. They include:
// - A-Z
// - a-z
// - 0-9
// - '-' (hyphen)
// - '.' (period)
// - '_' (underscore)
// - '~' (tilde)
inline bool IsUnreserved(int c) {
return IsAlphaNumeric((char)c) ||
c == '-' || c == '.' || c == '_' || c == '~';
}
// Sub-delimiters are those characters that may have a defined meaning within
// component of a URL/URI for a particular scheme. They do not serve as
// delimiters in any case between URL/URI segments. Sub-delimiters include:
// - All of these !$&'()*+,;=
inline bool SubDelimiter(int c) {
switch (c) {
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return true;
default:
return false;
}
}
inline bool IsPathChar(int c) {
return IsUnreserved(c) || SubDelimiter(c) ||
c == '%' || c == '/' || c == ':' || c == '@';
}
// Legal characters in the query portion include:
// - Any path character
// - '?' (question mark)
inline bool IsQueryChar(int c) {
return IsPathChar(c) || c == '?';
}
// Encode the URL query string.
inline std::string EncodeQuery(const std::string& query) {
return EncodeImpl(query, [](int c) {
return !IsQueryChar(c) || c == '%' || c == '+';
});
}
bool SplitKeyValue(const std::string& kv, std::string* key,
std::string* value) {
std::size_t i = kv.find_first_of('=');
if (i == std::string::npos || i == 0) {
return false;
}
*key = kv.substr(0, i);
*value = kv.substr(i + 1);
return true;
}
} // namespace
// -----------------------------------------------------------------------------
UrlQuery::UrlQuery(const std::string& str) {
if (!str.empty()) {
// Split into key value pairs separated by '&'.
for (std::size_t i = 0; i != std::string::npos;) {
std::size_t j = str.find_first_of('&', i);
std::string kv;
if (j == std::string::npos) {
kv = str.substr(i);
i = std::string::npos;
} else {
kv = str.substr(i, j - i);
i = j + 1;
}
std::string key;
std::string value;
if (SplitKeyValue(kv, &key, &value)) {
Add(std::move(key), std::move(value));
}
}
}
}
void UrlQuery::Add(std::string&& key, std::string&& value) {
if (!Has(key)) {
parameters_.push_back({ std::move(key), std::move(value) });
}
}
void UrlQuery::Add(const std::string& key, const std::string& value) {
if (!Has(key)) {
parameters_.push_back({ key, value });
}
}
void UrlQuery::Remove(const std::string& key) {
auto it = Find(key);
if (it != parameters_.end()) {
parameters_.erase(it);
}
}
const std::string& UrlQuery::Get(const std::string& key) const {
auto it = Find(key);
if (it != parameters_.end()) {
return it->second;
}
static const std::string kEmptyValue;
return kEmptyValue;
}
std::string UrlQuery::ToString() const {
if (parameters_.empty()) {
return "";
}
std::string str = parameters_[0].first + "=" + parameters_[0].second;
for (std::size_t i = 1; i < parameters_.size(); ++i) {
str += "&";
str += parameters_[i].first + "=" + parameters_[i].second;
}
str = EncodeQuery(str);
return str;
}
UrlQuery::ConstIterator UrlQuery::Find(const std::string& key) const {
return std::find_if(parameters_.begin(),
parameters_.end(),
[&key](const Parameter& p) { return p.first == key; });
}
// -----------------------------------------------------------------------------
Url::Url(const std::string& str, bool decode) {
Init(str, decode);
}
void Url::Init(const std::string& str, bool decode, bool clear) {
if (clear) {
Clear();
}
if (!decode || str.find('%') == std::string::npos) {
Parse(str);
return;
}
std::string decoded;
if (Decode(str, &decoded)) {
Parse(decoded);
} else {
// TODO: Exception?
Parse(str);
}
}
void Url::AddQuery(const std::string& key, const std::string& value) {
if (!query_.empty()) {
query_ += "&";
}
query_ += key + "=" + value;
}
void Url::Parse(const std::string& str) {
std::string tmp = boost::trim_left_copy(str);
std::size_t pos = std::string::npos;
pos = tmp.find("://");
if (pos != std::string::npos) {
scheme_ = tmp.substr(0, pos);
tmp = tmp.substr(pos + 3);
}
pos = tmp.find('/');
if (pos != std::string::npos) {
host_ = tmp.substr(0, pos);
tmp = tmp.substr(pos + 1);
pos = tmp.find('?');
if (pos != std::string::npos) {
path_ = tmp.substr(0, pos);
query_ = tmp.substr(pos + 1);
} else {
path_ = tmp;
}
} else {
path_ = "";
pos = tmp.find('?');
if (pos != std::string::npos) {
host_ = tmp.substr(0, pos);
query_ = tmp.substr(pos + 1);
} else {
host_ = tmp;
}
}
if (!host_.empty()) {
pos = host_.find(':');
if (pos != std::string::npos) {
port_ = host_.substr(pos + 1);
host_ = host_.substr(0, pos);
}
}
}
void Url::Clear() {
scheme_.clear();
host_.clear();
port_.clear();
path_.clear();
query_.clear();
}
} // namespace webcc