diff --git a/bak/encoding.cc b/bak/encoding.cc deleted file mode 100644 index fd5ebae..0000000 --- a/bak/encoding.cc +++ /dev/null @@ -1 +0,0 @@ -#include "webcc/encoding.h" diff --git a/bak/encoding.h b/bak/encoding.h deleted file mode 100644 index 8b3cf96..0000000 --- a/bak/encoding.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef WEBCC_ENCODING_H_ -#define WEBCC_ENCODING_H_ - -#include -#include - -namespace webcc { - -std::string Utf16ToUtf8(const std::wstring& utf16_string) { - std::wstring_convert> converter; - return converter.to_bytes(utf16_string); -} - -std::wstring Utf8ToUtf16(const std::string& utf8_string) { - std::wstring_convert> converter; - return converter.from_bytes(utf8_string); -} - -} // namespace webcc - -#endif // WEBCC_ENCODING_H_ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 63ffcb5..938cbc5 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -57,3 +57,8 @@ target_link_libraries(form_server ${EXAMPLE_LIBS}) add_subdirectory(book_server) add_subdirectory(book_client) + +if(WIN32) + add_executable(url_unicode url_unicode.cc encoding.cc encoding.h) + target_link_libraries(url_unicode ${EXAMPLE_LIBS}) +endif() diff --git a/examples/common/book.cc b/examples/common/book.cc deleted file mode 100644 index 62ee25b..0000000 --- a/examples/common/book.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include "examples/common/book.h" - -#include -#include - -const Book kNullBook{}; - -std::ostream& operator<<(std::ostream& os, const Book& book) { - os << "{ " << book.id << ", " << book.title << ", " << book.price << " }"; - return os; -} - -const Book& BookStore::GetBook(const std::string& id) const { - auto it = FindBook(id); - return (it == books_.end() ? kNullBook : *it); -} - -std::string BookStore::AddBook(const Book& book) { - std::string id = NewID(); - books_.push_back({ id, book.title, book.price }); - return id; -} - -bool BookStore::UpdateBook(const Book& book) { - auto it = FindBook(book.id); - if (it != books_.end()) { - it->title = book.title; - it->price = book.price; - return true; - } - return false; -} - -bool BookStore::DeleteBook(const std::string& id) { - auto it = FindBook(id); - - if (it != books_.end()) { - books_.erase(it); - return true; - } - - return false; -} - -std::list::const_iterator BookStore::FindBook(const std::string& id) - const { - return std::find_if(books_.begin(), books_.end(), - [&id](const Book& book) { return book.id == id; }); -} - -std::list::iterator BookStore::FindBook(const std::string& id) { - return std::find_if(books_.begin(), books_.end(), - [&id](Book& book) { return book.id == id; }); -} - -std::string BookStore::NewID() const { - static int s_id_counter = 0; - - ++s_id_counter; - return std::to_string(s_id_counter); -} diff --git a/examples/common/book.h b/examples/common/book.h deleted file mode 100644 index 2dea1fa..0000000 --- a/examples/common/book.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef EXAMPLE_COMMON_BOOK_H_ -#define EXAMPLE_COMMON_BOOK_H_ - -#include -#include - -#include "boost/filesystem/path.hpp" - -// In-memory test data. -// There should be some database in a real product. - -struct Book { - std::string id; - std::string title; - double price; - boost::filesystem::path photo; - - bool IsNull() const { return id.empty(); } -}; - -std::ostream& operator<<(std::ostream& os, const Book& book); - -extern const Book kNullBook; - -class BookStore { -public: - const std::list& books() const { - return books_; - } - - const Book& GetBook(const std::string& id) const; - - // Add a book, return the ID. - // NOTE: The ID of the input book will be ignored so should be empty. - std::string AddBook(const Book& book); - - bool UpdateBook(const Book& book); - - bool DeleteBook(const std::string& id); - -private: - std::list::const_iterator FindBook(const std::string& id) const; - - std::list::iterator FindBook(const std::string& id); - - // Allocate a new book ID. - std::string NewID() const; - - std::list books_; -}; - -#endif // EXAMPLE_COMMON_BOOK_H_ diff --git a/examples/common/book_json.cc b/examples/common/book_json.cc deleted file mode 100644 index c5fd817..0000000 --- a/examples/common/book_json.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include "examples/common/book_json.h" - -#include -#include - -#include "json/json.h" - -#include "examples/common/book.h" - -std::string JsonToString(const Json::Value& json) { - Json::StreamWriterBuilder builder; - return Json::writeString(builder, json); -} - -Json::Value StringToJson(const std::string& str) { - Json::Value json; - - Json::CharReaderBuilder builder; - std::stringstream stream(str); - std::string errs; - if (!Json::parseFromStream(builder, stream, &json, &errs)) { - std::cerr << errs << std::endl; - } - - return json; -} - -Json::Value BookToJson(const Book& book) { - Json::Value json; - json["id"] = book.id; - json["title"] = book.title; - json["price"] = book.price; - return json; -} - -Book JsonToBook(const Json::Value& json) { - return { - json["id"].asString(), - json["title"].asString(), - json["price"].asDouble(), - }; -} - -std::string BookToJsonString(const Book& book) { - return JsonToString(BookToJson(book)); -} - -bool JsonStringToBook(const std::string& json_str, Book* book) { - Json::Value json = StringToJson(json_str); - - if (!json) { - return false; - } - - *book = JsonToBook(json); - return true; -} diff --git a/examples/common/book_json.h b/examples/common/book_json.h deleted file mode 100644 index dbb7027..0000000 --- a/examples/common/book_json.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef EXAMPLE_COMMON_BOOK_JSON_H_ -#define EXAMPLE_COMMON_BOOK_JSON_H_ - -#include - -#include "json/json-forwards.h" - -struct Book; - -std::string JsonToString(const Json::Value& json); - -Json::Value StringToJson(const std::string& str); - -Json::Value BookToJson(const Book& book); -Book JsonToBook(const Json::Value& json); - -std::string BookToJsonString(const Book& book); -bool JsonStringToBook(const std::string& json_str, Book* book); - -#endif // EXAMPLE_COMMON_BOOK_JSON_H_ diff --git a/examples/encoding.cc b/examples/encoding.cc new file mode 100644 index 0000000..4faadea --- /dev/null +++ b/examples/encoding.cc @@ -0,0 +1,60 @@ +#include "encoding.h" + +#include + +namespace { + +// Wrapper for Windows API MultiByteToWideChar. +std::wstring MB2WC(const std::string& input, unsigned int code_page) { + if (input.empty()) { + return L""; + } + + int length = ::MultiByteToWideChar(code_page, 0, &input[0], + static_cast(input.size()), + NULL, 0); + + std::wstring output(length, '\0'); + + ::MultiByteToWideChar(code_page, 0, &input[0], static_cast(input.size()), + &output[0], static_cast(output.size())); + + return output; +} + +// Wrapper for Windows API WideCharToMultiByte. +std::string WC2MB(const std::wstring& input, unsigned int code_page) { + if (input.empty()) { + return ""; + } + + // There do have other code pages which require the flags to be 0, e.g., + // 50220, 50211, and so on. But they are not included in our charset + // dictionary. So, only consider 65001 (UTF-8) and 54936 (GB18030). + DWORD flags = 0; + if (code_page != 65001 && code_page != 54936) { + flags = WC_NO_BEST_FIT_CHARS | WC_COMPOSITECHECK | WC_DEFAULTCHAR; + } + + int length = ::WideCharToMultiByte(code_page, flags, &input[0], + static_cast(input.size()), NULL, 0, + NULL, NULL); + + std::string output(length, '\0'); + + ::WideCharToMultiByte(code_page, flags, &input[0], + static_cast(input.size()), &output[0], + static_cast(output.size()), NULL, NULL); + + return output; +} + +} // namespace + +std::string Utf16ToUtf8(const std::wstring& utf16_string) { + return WC2MB(utf16_string, CP_UTF8); +} + +std::wstring Utf8ToUtf16(const std::string& utf8_string) { + return MB2WC(utf8_string, CP_UTF8); +} diff --git a/examples/encoding.h b/examples/encoding.h new file mode 100644 index 0000000..cff9258 --- /dev/null +++ b/examples/encoding.h @@ -0,0 +1,12 @@ +#ifndef ENCODING_H_ +#define ENCODING_H_ + +#include + +// Convert UTF16 to UTF8. +std::string Utf16ToUtf8(const std::wstring& utf16_string); + +// Convert UTF8 to UTF16. +std::wstring Utf8ToUtf16(const std::string& utf8_string); + +#endif // ENCODING_H_ diff --git a/examples/url_unicode.cc b/examples/url_unicode.cc new file mode 100644 index 0000000..6d24a86 Binary files /dev/null and b/examples/url_unicode.cc differ diff --git a/webcc/url.cc b/webcc/url.cc index e758f0f..cf9a75d 100644 --- a/webcc/url.cc +++ b/webcc/url.cc @@ -83,7 +83,8 @@ std::string EncodeImpl(const std::string& raw, // UTF8 for (auto i = raw.begin(); i != raw.end(); ++i) { // For UTF8 encoded string, char ASCII can be greater than 127. - int c = static_cast(*i); + // Cast to unsigned char firstly to make sure it's in [0,255]. + int c = static_cast(*i); if (should_encode(c)) { encoded.push_back('%'); @@ -101,7 +102,8 @@ std::string EncodeImpl(const std::string& raw, // UTF8 // are called unreserved. These include uppercase and lowercase letters, decimal // digits, hyphen, period, underscore, and tilde. inline bool IsUnreserved(int c) { - return std::isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; + return std::isalnum((unsigned char)c) || c == '-' || c == '.' || c == '_' || + c == '~'; } // General delimiters serve as the delimiters between different uri components.