From 31cfac65e84faeb639cd712451ec676fa0e31553 Mon Sep 17 00:00:00 2001 From: Adam Gu Date: Fri, 10 Aug 2018 10:22:06 +0800 Subject: [PATCH] Encode URL queries. --- CMakeLists.txt | 3 -- webcc/CMakeLists.txt | 3 ++ webcc/http_server.h | 2 - webcc/logger.h | 1 + webcc/url.cc | 123 ++++++++++++++++++++++++++++++++++++++----- webcc/url.h | 2 - 6 files changed, 113 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcaa6d5..c98ba63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,9 +47,6 @@ if(WEBCC_BUILD_UNITTEST) enable_testing() endif() -# Adhere to GNU filesystem layout conventions. -include(GNUInstallDirs) - # Automatically detect _WIN32_WINNT for Asio. # See: https://stackoverflow.com/a/40217291 if(WIN32) diff --git a/webcc/CMakeLists.txt b/webcc/CMakeLists.txt index 5c5f039..a3e28b0 100644 --- a/webcc/CMakeLists.txt +++ b/webcc/CMakeLists.txt @@ -5,6 +5,9 @@ if(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) endif() +# Adhere to GNU filesystem layout conventions. +include(GNUInstallDirs) + set(HEADERS globals.h http_async_client.h diff --git a/webcc/http_server.h b/webcc/http_server.h index 17615a0..2997376 100644 --- a/webcc/http_server.h +++ b/webcc/http_server.h @@ -2,13 +2,11 @@ #define WEBCC_HTTP_SERVER_H_ #include -#include #include "boost/asio/io_context.hpp" #include "boost/asio/ip/tcp.hpp" #include "boost/asio/signal_set.hpp" #include "boost/scoped_ptr.hpp" -#include "boost/thread/thread.hpp" #include "webcc/globals.h" #include "webcc/http_connection.h" diff --git a/webcc/logger.h b/webcc/logger.h index b8f37e9..800fd5d 100644 --- a/webcc/logger.h +++ b/webcc/logger.h @@ -36,6 +36,7 @@ enum LogMode { // Commonly used modes. const int LOG_CONSOLE_FILE_APPEND = LOG_CONSOLE | LOG_FILE; const int LOG_CONSOLE_FILE_OVERWRITE = LOG_CONSOLE | LOG_FILE | LOG_OVERWRITE; +const int LOG_FILE_OVERWRITE = LOG_FILE | LOG_OVERWRITE; // Initialize logger. // If |dir| is empty, log file will be generated in current directory. diff --git a/webcc/url.cc b/webcc/url.cc index 29612d1..090210e 100644 --- a/webcc/url.cc +++ b/webcc/url.cc @@ -1,6 +1,7 @@ #include "webcc/url.h" #include +#include #include namespace webcc { @@ -8,8 +9,10 @@ namespace webcc { // ----------------------------------------------------------------------------- // Helper functions to decode URL string. +namespace { + // Convert a hex character digit to a decimal character value. -static bool HexToDecimal(char hex, int* decimal) { +bool HexToDecimal(char hex, int* decimal) { if (hex >= '0' && hex <= '9') { *decimal = hex - '0'; } else if (hex >= 'A' && hex <= 'F') { @@ -22,7 +25,7 @@ static bool HexToDecimal(char hex, int* decimal) { return true; } -static bool Decode(const std::string& encoded, std::string* raw) { +bool Decode(const std::string& encoded, std::string* raw) { for (auto iter = encoded.begin(); iter != encoded.end(); ++iter) { if (*iter == '%') { if (++iter == encoded.end()) { @@ -58,6 +61,109 @@ static bool Decode(const std::string& encoded, std::string* raw) { return true; } +// Encodes all characters not in given set determined by given function. +std::string EncodeImpl(const std::string& raw, + std::function should_encode) { + const char* const hex = "0123456789ABCDEF"; + std::string encoded; + + for (auto iter = raw.begin(); iter != raw.end(); ++iter) { + // For UTF8 encoded string, char ASCII can be greater than 127. + int ch = static_cast(*iter); + + // |ch| should be the same under both UTF8 and UTF16. + if (should_encode(ch)) { + encoded.push_back('%'); + encoded.push_back(hex[(ch >> 4) & 0xF]); + encoded.push_back(hex[ch & 0xF]); + } else { + // ASCII doesn't need to be encoded, it should be the same under both + // UTF8 and UTF16. + encoded.push_back(static_cast(ch)); + } + } + + return encoded; +} + +// Our own implementation of alpha numeric instead of std::isalnum to avoid +// taking global lock for performance reasons. +inline bool IsAlphaNumeric(char c) { + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z'); +} + +// Unreserved characters are those that are allowed in a URL/URI but do not have +// a reserved purpose. They include: +// - A-Z +// - a-z +// - 0-9 +// - '-' (hyphen) +// - '.' (period) +// - '_' (underscore) +// - '~' (tilde) +inline bool IsUnreserved(int c) { + return IsAlphaNumeric((char)c) || + c == '-' || c == '.' || c == '_' || c == '~'; +} + +// Sub-delimiters are those characters that may have a defined meaning within +// component of a URL/URI for a particular scheme. They do not serve as +// delimiters in any case between URL/URI segments. Sub-delimiters include: +// - All of these !$&'()*+,;= +inline bool SubDelimiter(int c) { + switch (c) { + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return true; + default: + return false; + } +} + +inline bool IsPathChar(int c) { + return IsUnreserved(c) || SubDelimiter(c) || + c == '%' || c == '/' || c == ':' || c == '@'; +} + +// Legal characters in the query portion include: +// - Any path character +// - '?' (question mark) +inline bool IsQueryChar(int c) { + return IsPathChar(c) || c == '?'; +} + +// Encode the URL query string. +inline std::string EncodeQuery(const std::string& query) { + return EncodeImpl(query, [](int c) { + return !IsQueryChar(c) || c == '%' || c == '+'; + }); +} + +bool SplitKeyValue(const std::string& kv, std::string* key, + std::string* value) { + std::size_t i = kv.find_first_of('='); + if (i == std::string::npos || i == 0) { + return false; + } + + *key = kv.substr(0, i); + *value = kv.substr(i + 1); + return true; +} + +} // namespace + // ----------------------------------------------------------------------------- UrlQuery::UrlQuery(const std::map& map) { @@ -107,6 +213,7 @@ std::string UrlQuery::ToString() const { str += parameters_[i].first + "=" + parameters_[i].second; } + str = EncodeQuery(str); return str; } @@ -149,18 +256,6 @@ std::vector Url::SplitPath(const std::string& path) { return results; } -static bool SplitKeyValue(const std::string& kv, - std::string* key, std::string* value) { - std::size_t i = kv.find_first_of('='); - if (i == std::string::npos || i == 0) { - return false; - } - - *key = kv.substr(0, i); - *value = kv.substr(i + 1); - return true; -} - // static void Url::SplitQuery(const std::string& str, UrlQuery* query) { const std::size_t NPOS = std::string::npos; diff --git a/webcc/url.h b/webcc/url.h index e6a915b..e966bfe 100644 --- a/webcc/url.h +++ b/webcc/url.h @@ -12,8 +12,6 @@ #include #include -//#include "webcc/globals.h" - namespace webcc { // -----------------------------------------------------------------------------