From 5c465a1e2d262b92543eb757e6e7ed5e18cbf2ec Mon Sep 17 00:00:00 2001 From: Chunting Gu Date: Thu, 11 Apr 2019 17:35:09 +0800 Subject: [PATCH] Support file upload in server side. --- examples/CMakeLists.txt | 14 ++- examples/file_upload_client.cc | 55 +++++++++ examples/file_upload_server.cc | 63 ++++++++++ examples/rest_book_client.cc | 4 +- webcc/CMakeLists.txt | 2 + webcc/common.cc | 172 +++++++++++++++++++++++++++ webcc/common.h | 136 ++++++++++++++++++++++ webcc/globals.h | 2 + webcc/http_client_session.cc | 52 ++++----- webcc/http_client_session.h | 22 ++-- webcc/http_file.h | 12 ++ webcc/http_message.cc | 59 +--------- webcc/http_message.h | 80 ++++--------- webcc/http_parser.cc | 73 ++++++------ webcc/http_parser.h | 20 +++- webcc/http_request.h | 14 +++ webcc/http_request_parser.cc | 205 +++++++++++++++++++++++++++++++++ webcc/http_request_parser.h | 13 +++ webcc/rest_request_handler.cc | 2 + webcc/rest_request_handler.h | 1 + webcc/rest_server.h | 1 + webcc/rest_service.h | 10 +- 22 files changed, 822 insertions(+), 190 deletions(-) create mode 100644 examples/file_upload_client.cc create mode 100644 examples/file_upload_server.cc create mode 100644 webcc/common.cc create mode 100644 webcc/common.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 6c6e8f0..2a7500f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -22,15 +22,21 @@ set(REST_BOOK_SRCS ) add_executable(http_client http_client.cc) -add_executable(github_client github_client.cc) - target_link_libraries(http_client ${EXAMPLE_COMMON_LIBS}) + +add_executable(github_client github_client.cc) target_link_libraries(github_client ${EXAMPLE_COMMON_LIBS} jsoncpp) -add_executable(rest_book_server rest_book_server.cc ${REST_BOOK_SRCS}) -add_executable(rest_book_client rest_book_client.cc ${REST_BOOK_SRCS}) +add_executable(file_upload_client file_upload_client.cc) +target_link_libraries(file_upload_client ${EXAMPLE_COMMON_LIBS}) + +add_executable(file_upload_server file_upload_server.cc) +target_link_libraries(file_upload_server ${EXAMPLE_COMMON_LIBS}) +add_executable(rest_book_server rest_book_server.cc ${REST_BOOK_SRCS}) target_link_libraries(rest_book_server ${EXAMPLE_COMMON_LIBS} jsoncpp) + +add_executable(rest_book_client rest_book_client.cc ${REST_BOOK_SRCS}) target_link_libraries(rest_book_client ${EXAMPLE_COMMON_LIBS} jsoncpp) if(WEBCC_ENABLE_SOAP) diff --git a/examples/file_upload_client.cc b/examples/file_upload_client.cc new file mode 100644 index 0000000..e2ec736 --- /dev/null +++ b/examples/file_upload_client.cc @@ -0,0 +1,55 @@ +#include + +#include "boost/filesystem.hpp" + +#include "webcc/http_client_session.h" +#include "webcc/logger.h" + +#if (defined(WIN32) || defined(_WIN64)) +// You need to set environment variable SSL_CERT_FILE properly to enable +// SSL verification. +bool kSslVerify = false; +#else +bool kSslVerify = true; +#endif + +void Help(const char* argv0) { + std::cout << "Usage: " << argv0 << " " << std::endl; + std::cout << " E.g.," << std::endl; + std::cout << " " << argv0 << "E:/github/webcc/data/upload" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + Help(argv[0]); + return 1; + } + + WEBCC_LOG_INIT("", webcc::LOG_CONSOLE); + + const webcc::Path upload_dir(argv[1]); + + namespace bfs = boost::filesystem; + + if (!bfs::is_directory(upload_dir) || !bfs::exists(upload_dir)) { + std::cerr << "Invalid upload dir!" << std::endl; + return 1; + } + + webcc::HttpClientSession session; + + //std::string url = "http://httpbin.org/post"; + std::string url = "http://localhost:8080/upload"; + + try { + auto r = session.PostFile(url, "file", + upload_dir / "remember.txt"); + + //std::cout << r->content() << std::endl; + + } catch (const webcc::Exception& e) { + std::cout << "Exception: " << e.what() << std::endl; + } + + return 0; +} diff --git a/examples/file_upload_server.cc b/examples/file_upload_server.cc new file mode 100644 index 0000000..e2922f7 --- /dev/null +++ b/examples/file_upload_server.cc @@ -0,0 +1,63 @@ +#include +#include + +#include "webcc/logger.h" +#include "webcc/rest_server.h" +#include "webcc/rest_service.h" + +// ----------------------------------------------------------------------------- + +class FileUploadService : public webcc::RestService { +public: + void Handle(const webcc::RestRequest& request, + webcc::RestResponse* response) final { + if (request.http->method() == webcc::http::methods::kPost) { + std::cout << "files: " << request.http->files().size() << std::endl; + + for (auto& pair : request.http->files()) { + std::cout << "name: " << pair.first << std::endl; + std::cout << "data: " << std::endl << pair.second.data() << std::endl; + } + + response->content = "OK"; + response->media_type = webcc::http::media_types::kTextPlain; + response->charset = "utf-8"; + response->status = webcc::http::Status::kCreated; + } + } +}; + +// ----------------------------------------------------------------------------- + +void Help(const char* argv0) { + std::cout << "Usage: " << argv0 << " " << std::endl; + std::cout << " E.g.," << std::endl; + std::cout << " " << argv0 << " 8080" << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + Help(argv[0]); + return 1; + } + + WEBCC_LOG_INIT("", webcc::LOG_CONSOLE); + + std::uint16_t port = static_cast(std::atoi(argv[1])); + + std::size_t workers = 2; + + try { + webcc::RestServer server(port, workers); + + server.Bind(std::make_shared(), "/upload", false); + + server.Run(); + + } catch (const std::exception& e) { + std::cerr << "Exception: " << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/examples/rest_book_client.cc b/examples/rest_book_client.cc index eca6791..f460403 100644 --- a/examples/rest_book_client.cc +++ b/examples/rest_book_client.cc @@ -198,8 +198,6 @@ int main(int argc, char* argv[]) { return 1; } - WEBCC_LOG_INIT("", webcc::LOG_CONSOLE_FILE_OVERWRITE); - std::string url = argv[1]; int timeout = 0; @@ -207,6 +205,8 @@ int main(int argc, char* argv[]) { timeout = std::atoi(argv[2]); } + WEBCC_LOG_INIT("", webcc::LOG_CONSOLE_FILE_OVERWRITE); + // Share the same session. webcc::HttpClientSession session; diff --git a/webcc/CMakeLists.txt b/webcc/CMakeLists.txt index 9b1d001..e89644e 100644 --- a/webcc/CMakeLists.txt +++ b/webcc/CMakeLists.txt @@ -15,6 +15,7 @@ include(GNUInstallDirs) set(HEADERS base64.h + common.h globals.h http_client.h http_client_pool.h @@ -45,6 +46,7 @@ set(HEADERS set(SOURCES base64.cc + common.cc globals.cc http_client.cc http_client_pool.cc diff --git a/webcc/common.cc b/webcc/common.cc new file mode 100644 index 0000000..668769f --- /dev/null +++ b/webcc/common.cc @@ -0,0 +1,172 @@ +#include "webcc/common.h" + +#include "boost/algorithm/string.hpp" + +#include "webcc/logger.h" + +namespace webcc { + +// ----------------------------------------------------------------------------- + +bool Split2(const std::string& str, char token, std::string* part1, + std::string* part2) { + std::size_t pos = str.find(token); + if (pos == std::string::npos) { + return false; + } + + *part1 = str.substr(0, pos); + *part2 = str.substr(pos + 1); + + boost::trim(*part1); + boost::trim(*part2); + + return true; +} + +// ----------------------------------------------------------------------------- + +void HttpHeaders::Set(const std::string& key, const std::string& value) { + auto it = Find(key); + if (it != headers_.end()) { + it->second = value; + } else { + headers_.push_back({ key, value }); + } +} + +void HttpHeaders::Set(std::string&& key, std::string&& value) { + auto it = Find(key); + if (it != headers_.end()) { + it->second = std::move(value); + } else { + headers_.push_back({ std::move(key), std::move(value) }); + } +} + +bool HttpHeaders::Has(const std::string& key) const { + return const_cast(this)->Find(key) != headers_.end(); +} + +const std::string& HttpHeaders::Get(const std::string& key, + bool* existed) const { + auto it = const_cast(this)->Find(key); + + if (existed != nullptr) { + *existed = (it != headers_.end()); + } + + if (it != headers_.end()) { + return it->second; + } + + static const std::string s_no_value; + return s_no_value; +} + +std::vector::iterator HttpHeaders::Find(const std::string& key) { + auto it = headers_.begin(); + for (; it != headers_.end(); ++it) { + if (boost::iequals(it->first, key)) { + break; + } + } + return it; +} + +// ----------------------------------------------------------------------------- + +static bool ParseValue(const std::string& str, const std::string& expected_key, + std::string* value) { + std::string key; + if (!Split2(str, '=', &key, value)) { + return false; + } + + if (key != expected_key) { + return false; + } + + return !value->empty(); +} + +ContentType::ContentType(const std::string& str) { + Init(str); +} + +void ContentType::Parse(const std::string& str) { + media_type_.clear(); + additional_.clear(); + multipart_ = false; + + Init(str); +} + +bool ContentType::Valid() const { + if (media_type_.empty()) { + return false; + } + + if (multipart_) { + return !boundary().empty(); + } + + return true; +} + +void ContentType::Init(const std::string& str) { + std::string other; + Split2(str, ';', &media_type_, &other); + + if (media_type_ == "multipart/form-data") { + multipart_ = true; + if (!ParseValue(other, "boundary", &additional_)) { + LOG_ERRO("Invalid 'multipart/form-data' content-type (no boundary)."); + } else { + LOG_INFO("Content-type multipart boundary: %s.", additional_.c_str()); + } + } else { + if (ParseValue(other, "charset", &additional_)) { + LOG_INFO("Content-type charset: %s.", additional_.c_str()); + } + } +} + +// ----------------------------------------------------------------------------- + +static void Unquote(std::string& str) { + boost::trim_if(str, boost::is_any_of("\"")); +} + +bool ContentDisposition::Init(const std::string& str) { + std::vector parts; + boost::split(parts, str, boost::is_any_of(";")); + + if (parts.empty()) { + return false; + } + + if (parts[0] != "form-data") { + return false; + } + + std::string key; + std::string value; + for (std::size_t i = 1; i < parts.size(); ++i) { + if (!Split2(parts[i], '=', &key, &value)) { + return false; + } + + if (key == "name") { + name_ = value; + Unquote(name_); + } else if (key == "filename") { + file_name_ = value; + Unquote(file_name_); + } + } + + return true; +} + +} // namespace webcc diff --git a/webcc/common.h b/webcc/common.h new file mode 100644 index 0000000..bf84058 --- /dev/null +++ b/webcc/common.h @@ -0,0 +1,136 @@ +#ifndef WEBCC_COMMON_H_ +#define WEBCC_COMMON_H_ + +#include +#include +#include +#include + +namespace webcc { + +// ----------------------------------------------------------------------------- + +// Split a string to two parts by the given token. +bool Split2(const std::string& str, char token, std::string* part1, + std::string* part2); + +// ----------------------------------------------------------------------------- + +typedef std::pair HttpHeader; + +class HttpHeaders { +public: + std::size_t size() const { + return headers_.size(); + } + + const std::vector& data() const { + return headers_; + } + + void Set(const std::string& key, const std::string& value); + + void Set(std::string&& key, std::string&& value); + + bool Has(const std::string& key) const; + + // Get header by index. + const HttpHeader& Get(std::size_t index) const { + assert(index < size()); + return headers_[index]; + } + + // Get header value by key. + // If there's no such header with the given key, besides return empty, the + // optional |existed| parameter will be set to false. + const std::string& Get(const std::string& key, bool* existed = nullptr) const; + + void Clear() { + headers_.clear(); + } + +private: + std::vector::iterator Find(const std::string& key); + + std::vector headers_; +}; + +// ----------------------------------------------------------------------------- + +// Content-Type header. +// Syntax: +// Content-Type: text/html; charset=utf-8 +// Content-Type: multipart/form-data; boundary=something +class ContentType { +public: + explicit ContentType(const std::string& str = ""); + + void Parse(const std::string& str); + + bool Valid() const; + + bool multipart() const { + return multipart_; + } + + const std::string& media_type() const { + return media_type_; + } + + const std::string& charset() const { + assert(!multipart_); + return additional_; + } + + const std::string& boundary() const { + assert(multipart_); + return additional_; + } + +private: + void Init(const std::string& str); + +private: + std::string media_type_; + std::string additional_; + bool multipart_ = false; +}; + +// ----------------------------------------------------------------------------- + +// Content-Disposition header. +// Syntax: +// Content-Disposition: form-data +// Content-Disposition: form-data; name="fieldName" +// Content-Disposition: form-data; name="fieldName"; filename="filename.jpg" +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition +class ContentDisposition { +public: + explicit ContentDisposition(const std::string& str) { + valid_ = Init(str); + } + + bool valid() const { + return valid_; + } + + const std::string& name() const { + return name_; + } + + const std::string& file_name() const { + return file_name_; + } + +private: + bool Init(const std::string& str); + +private: + std::string name_; + std::string file_name_; + bool valid_ = false; +}; + +} // namespace webcc + +#endif // WEBCC_COMMON_H_ diff --git a/webcc/globals.h b/webcc/globals.h index 864d8fc..061fd33 100644 --- a/webcc/globals.h +++ b/webcc/globals.h @@ -115,6 +115,7 @@ const char* const kAuthorization = "Authorization"; const char* const kContentType = "Content-Type"; const char* const kContentLength = "Content-Length"; const char* const kContentEncoding = "Content-Encoding"; +const char* const kContentDisposition = "Content-Disposition"; const char* const kConnection = "Connection"; const char* const kTransferEncoding = "Transfer-Encoding"; const char* const kAccept = "Accept"; @@ -131,6 +132,7 @@ namespace media_types { const char* const kApplicationJson = "application/json"; const char* const kApplicationSoapXml = "application/soap+xml"; +const char* const kTextPlain = "text/plain"; const char* const kTextXml = "text/xml"; // Get media type from file extension. diff --git a/webcc/http_client_session.cc b/webcc/http_client_session.cc index 5b7a1af..ca41c94 100644 --- a/webcc/http_client_session.cc +++ b/webcc/http_client_session.cc @@ -13,13 +13,13 @@ HttpResponsePtr HttpClientSession::Request(HttpRequestPtr request) { assert(request); for (const auto& h : headers_.data()) { - if (!request->HaveHeader(h.first)) { + if (!request->HasHeader(h.first)) { request->SetHeader(h.first, h.second); } } if (!content_type_.empty() && - !request->HaveHeader(http::headers::kContentType)) { + !request->HasHeader(http::headers::kContentType)) { request->SetContentType(content_type_, charset_); } @@ -67,41 +67,35 @@ HttpResponsePtr HttpClientSession::Post( return Request(builder()); } -HttpResponsePtr HttpClientSession::PostFile( - const std::string& url, const std::string& name, const Path& path, +HttpResponsePtr HttpClientSession::Put( + const std::string& url, std::string&& data, bool json, const std::vector& headers) { HttpRequestBuilder builder; - builder.Post().Url(url); + builder.Put().Url(url); SetHeaders(headers, &builder); - builder.File(name, path); + builder.Data(std::move(data)); + builder.Json(json); return Request(builder()); } -HttpResponsePtr HttpClientSession::PostFiles( - const std::string& url, const std::map& paths, - const std::vector& headers) { - assert(!paths.empty()); - +HttpResponsePtr HttpClientSession::Delete( + const std::string& url, const std::vector& headers) { HttpRequestBuilder builder; - builder.Post().Url(url); + builder.Delete().Url(url); SetHeaders(headers, &builder); - for (auto& pair : paths) { - builder.File(pair.first, pair.second); - } - return Request(builder()); } -HttpResponsePtr HttpClientSession::Put( +HttpResponsePtr HttpClientSession::Patch( const std::string& url, std::string&& data, bool json, const std::vector& headers) { HttpRequestBuilder builder; - builder.Put().Url(url); + builder.Patch().Url(url); SetHeaders(headers, &builder); @@ -111,26 +105,32 @@ HttpResponsePtr HttpClientSession::Put( return Request(builder()); } -HttpResponsePtr HttpClientSession::Delete( - const std::string& url, const std::vector& headers) { +HttpResponsePtr HttpClientSession::PostFile( + const std::string& url, const std::string& name, const Path& path, + const std::vector& headers) { HttpRequestBuilder builder; - builder.Delete().Url(url); + builder.Post().Url(url); SetHeaders(headers, &builder); + builder.File(name, path); + return Request(builder()); } -HttpResponsePtr HttpClientSession::Patch( - const std::string& url, std::string&& data, bool json, +HttpResponsePtr HttpClientSession::PostFiles( + const std::string& url, const std::map& paths, const std::vector& headers) { + assert(!paths.empty()); + HttpRequestBuilder builder; - builder.Patch().Url(url); + builder.Post().Url(url); SetHeaders(headers, &builder); - builder.Data(std::move(data)); - builder.Json(json); + for (auto& pair : paths) { + builder.File(pair.first, pair.second); + } return Request(builder()); } diff --git a/webcc/http_client_session.h b/webcc/http_client_session.h index 06c7ba7..bb684ef 100644 --- a/webcc/http_client_session.h +++ b/webcc/http_client_session.h @@ -59,16 +59,6 @@ public: HttpResponsePtr Post(const std::string& url, std::string&& data, bool json, const std::vector& headers = {}); - // Post a file. - HttpResponsePtr PostFile(const std::string& url, const std::string& name, - const Path& path, - const std::vector& headers = {}); - - // Post multiple files. - HttpResponsePtr PostFiles(const std::string& url, - const std::map& paths, - const std::vector& headers = {}); - // Shortcut for PUT request. HttpResponsePtr Put(const std::string& url, std::string&& data, bool json, const std::vector& headers = {}); @@ -81,6 +71,16 @@ public: HttpResponsePtr Patch(const std::string& url, std::string&& data, bool json, const std::vector& headers = {}); + // Post a file. + HttpResponsePtr PostFile(const std::string& url, const std::string& name, + const Path& path, + const std::vector& headers = {}); + + // Post multiple files. + HttpResponsePtr PostFiles(const std::string& url, + const std::map& paths, + const std::vector& headers = {}); + private: void InitHeaders(); @@ -94,7 +94,7 @@ private: std::string charset_; // Additional headers for each request. - HttpHeaderDict headers_; + HttpHeaders headers_; // Verify the certificate of the peer or not. bool ssl_verify_ = true; diff --git a/webcc/http_file.h b/webcc/http_file.h index f94861a..98c5105 100644 --- a/webcc/http_file.h +++ b/webcc/http_file.h @@ -49,10 +49,22 @@ public: return data_; } + void AppendData(const std::string& data) { + data_.append(data); + } + + void AppendData(const char* data, std::size_t size) { + data_.append(data, size); + } + const std::string& file_name() const { return file_name_; } + void set_file_name(const std::string& file_name) { + file_name_ = file_name; + } + const std::string& mime_type() const { return mime_type_; } diff --git a/webcc/http_message.cc b/webcc/http_message.cc index 23a56d5..1f9c766 100644 --- a/webcc/http_message.cc +++ b/webcc/http_message.cc @@ -4,6 +4,9 @@ #include "boost/algorithm/string.hpp" +#include "webcc/logger.h" +#include "webcc/utility.h" + namespace webcc { // ----------------------------------------------------------------------------- @@ -24,56 +27,6 @@ std::ostream& operator<<(std::ostream& os, const HttpMessage& message) { // ----------------------------------------------------------------------------- -void HttpHeaderDict::Set(const std::string& key, const std::string& value) { - auto it = Find(key); - if (it != headers_.end()) { - it->second = value; - } else { - headers_.push_back({ key, value }); - } -} - -void HttpHeaderDict::Set(std::string&& key, std::string&& value) { - auto it = Find(key); - if (it != headers_.end()) { - it->second = std::move(value); - } else { - headers_.push_back({ std::move(key), std::move(value) }); - } -} - -bool HttpHeaderDict::Have(const std::string& key) const { - return const_cast(this)->Find(key) != headers_.end(); -} - -const std::string& HttpHeaderDict::Get(const std::string& key, - bool* existed) const { - auto it = const_cast(this)->Find(key); - - if (existed != nullptr) { - *existed = (it != headers_.end()); - } - - if (it != headers_.end()) { - return it->second; - } - - static const std::string s_no_value; - return s_no_value; -} - -std::vector::iterator HttpHeaderDict::Find(const std::string& key) { - auto it = headers_.begin(); - for (; it != headers_.end(); ++it) { - if (boost::iequals(it->first, key)) { - break; - } - } - return it; -} - -// ----------------------------------------------------------------------------- - bool HttpMessage::IsConnectionKeepAlive() const { bool existed = false; const std::string& connection = @@ -180,12 +133,12 @@ void HttpMessage::Dump(std::ostream& os, std::size_t indent, } } else { // Split by EOL to achieve more readability. - std::vector splitted; - boost::split(splitted, content_, boost::is_any_of("\n")); + std::vector lines; + boost::split(lines, content_, boost::is_any_of("\n")); std::size_t size = 0; - for (const std::string& line : splitted) { + for (const std::string& line : lines) { os << indent_str; if (line.size() + size > kMaxDumpSize) { diff --git a/webcc/http_message.h b/webcc/http_message.h index 175373a..e271bd8 100644 --- a/webcc/http_message.h +++ b/webcc/http_message.h @@ -8,58 +8,15 @@ #include "boost/asio/buffer.hpp" // for const_buffer +#include "webcc/common.h" +#include "webcc/http_file.h" #include "webcc/globals.h" namespace webcc { -// ----------------------------------------------------------------------------- - class HttpMessage; std::ostream& operator<<(std::ostream& os, const HttpMessage& message); -// ----------------------------------------------------------------------------- - -typedef std::pair HttpHeader; - -class HttpHeaderDict { -public: - std::size_t size() const { - return headers_.size(); - } - - const std::vector& data() const { - return headers_; - } - - void Set(const std::string& key, const std::string& value); - - void Set(std::string&& key, std::string&& value); - - bool Have(const std::string& key) const; - - // Get header by index. - const HttpHeader& Get(std::size_t index) const { - assert(index < size()); - return headers_[index]; - } - - // Get header value by key. - // If there's no such header with the given key, besides return empty, the - // optional |existed| parameter will be set to false. - const std::string& Get(const std::string& key, bool* existed = nullptr) const; - - void Clear() { - headers_.clear(); - } - -private: - std::vector::iterator Find(const std::string& key); - - std::vector headers_; -}; - -// ----------------------------------------------------------------------------- - // Base class for HTTP request and response messages. class HttpMessage { public: @@ -86,12 +43,12 @@ public: bool IsConnectionKeepAlive() const; - void SetHeader(const std::string& key, const std::string& value) { - headers_.Set(key, value); + void SetHeader(HttpHeader&& header) { + headers_.Set(std::move(header.first), std::move(header.second)); } - void SetHeader(std::string&& key, std::string&& value) { - headers_.Set(std::move(key), std::move(value)); + void SetHeader(const std::string& key, const std::string& value) { + headers_.Set(key, value); } const std::string& GetHeader(const std::string& key, @@ -99,8 +56,8 @@ public: return headers_.Get(key, existed); } - bool HaveHeader(const std::string& key) const { - return headers_.Have(key); + bool HasHeader(const std::string& key) const { + return headers_.Has(key); } http::ContentEncoding GetContentEncoding() const; @@ -108,11 +65,20 @@ public: // Return true if header Accept-Encoding contains "gzip". bool AcceptEncodingGzip() const; + const ContentType& content_type() const { + return content_type_; + } + + // TODO: Set header? + void SetContentType(const ContentType& content_type) { + content_type_ = content_type; + } + void SetContentType(const std::string& content_type) { SetHeader(http::headers::kContentType, content_type); } - // E.g., "text/html", "application/json; charset=utf-8", etc. + // Example: SetContentType("application/json", "utf-8") void SetContentType(const std::string& media_type, const std::string& charset); @@ -141,14 +107,18 @@ protected: SetHeader(http::headers::kContentLength, std::to_string(content_length)); } +protected: // Start line with trailing CRLF. + // TODO: Don't include trailing CRLF since it's confusing. std::string start_line_; - std::size_t content_length_; + std::string content_; - HttpHeaderDict headers_; + ContentType content_type_; - std::string content_; + std::size_t content_length_; + + HttpHeaders headers_; }; } // namespace webcc diff --git a/webcc/http_parser.cc b/webcc/http_parser.cc index 520e0f4..6d6176d 100644 --- a/webcc/http_parser.cc +++ b/webcc/http_parser.cc @@ -4,14 +4,16 @@ #include "webcc/http_message.h" #include "webcc/logger.h" +#include "webcc/utility.h" #include "webcc/zlib_wrapper.h" namespace webcc { // ----------------------------------------------------------------------------- -static bool StringToSizeT(const std::string& str, int base, - std::size_t* output) { +namespace { + +bool StringToSizeT(const std::string& str, int base, std::size_t* output) { try { *output = static_cast(std::stoul(str, 0, base)); } catch (const std::exception&) { @@ -20,6 +22,8 @@ static bool StringToSizeT(const std::string& str, int base, return true; } +} // namespace + // ----------------------------------------------------------------------------- HttpParser::HttpParser(HttpMessage* message) @@ -60,12 +64,7 @@ bool HttpParser::Parse(const char* data, std::size_t length) { } // Now, parse the content. - - if (chunked_) { - return ParseChunkedContent(); - } else { - return ParseFixedContent(); - } + return ParseContent(); } void HttpParser::Reset() { @@ -86,7 +85,7 @@ bool HttpParser::ParseHeaders() { while (true) { std::string line; - if (!NextPendingLine(off, &line, false)) { + if (!GetNextLine(off, &line, false)) { // Can't find a full header line, need more data from next read. break; } @@ -109,14 +108,14 @@ bool HttpParser::ParseHeaders() { } } - // Remove the parsed data. + // Remove the data which has just been parsed. pending_data_.erase(0, off); return true; } -bool HttpParser::NextPendingLine(std::size_t off, std::string* line, - bool remove) { +bool HttpParser::GetNextLine(std::size_t off, std::string* line, + bool erase) { std::size_t pos = pending_data_.find(kCRLF, off); if (pos == std::string::npos) { @@ -125,11 +124,11 @@ bool HttpParser::NextPendingLine(std::size_t off, std::string* line, std::size_t count = pos - off; - if (pos > off) { + if (count > 0) { *line = pending_data_.substr(off, count); } // else: empty line - if (remove) { + if (erase) { pending_data_.erase(off, count + 2); } @@ -137,25 +136,18 @@ bool HttpParser::NextPendingLine(std::size_t off, std::string* line, } bool HttpParser::ParseHeaderLine(const std::string& line) { - // NOTE: Can't split with ":" because date time also contains ":". - std::size_t pos = line.find(':'); - if (pos == std::string::npos) { + HttpHeader header; + if (!Split2(line, ':', &header.first, &header.second)) { return false; } - std::string name = line.substr(0, pos); - boost::trim(name); - - std::string value = line.substr(pos + 1); - boost::trim(value); - do { if (!chunked_ && !content_length_parsed_) { - if (boost::iequals(name, http::headers::kContentLength)) { + if (boost::iequals(header.first, http::headers::kContentLength)) { content_length_parsed_ = true; - if (!StringToSizeT(value, 10, &content_length_)) { - LOG_ERRO("Invalid content length: %s.", value.c_str()); + if (!StringToSizeT(header.second, 10, &content_length_)) { + LOG_ERRO("Invalid content length: %s.", header.second.c_str()); return false; } @@ -175,8 +167,8 @@ bool HttpParser::ParseHeaderLine(const std::string& line) { // TODO: Replace `!chunked_` with . if (!chunked_ && !content_length_parsed_) { - if (boost::iequals(name, http::headers::kTransferEncoding)) { - if (value == "chunked") { + if (boost::iequals(header.first, http::headers::kTransferEncoding)) { + if (header.second == "chunked") { // The content is chunked. chunked_ = true; } @@ -186,12 +178,29 @@ bool HttpParser::ParseHeaderLine(const std::string& line) { } } while (false); - // Save the header to the result message. - message_->SetHeader(std::move(name), std::move(value)); + // Parse Content-Type. + if (boost::iequals(header.first, http::headers::kContentType)) { + ContentType content_type(header.second); + if (!content_type.Valid()) { + LOG_ERRO("Invalid content-type header."); + return false; + } + message_->SetContentType(content_type); + } + + message_->SetHeader(std::move(header)); return true; } +bool HttpParser::ParseContent() { + if (chunked_) { + return ParseChunkedContent(); + } else { + return ParseFixedContent(); + } +} + bool HttpParser::ParseFixedContent() { if (!content_length_parsed_) { // No Content-Length, no content. @@ -201,7 +210,6 @@ bool HttpParser::ParseFixedContent() { if (content_length_ == kInvalidLength) { // Invalid content length (syntax error). - // Normally, shouldn't be here. return false; } @@ -273,9 +281,8 @@ bool HttpParser::ParseChunkedContent() { bool HttpParser::ParseChunkSize() { LOG_VERB("Parse chunk size."); - std::size_t off = 0; std::string line; - if (!NextPendingLine(off, &line, true)) { + if (!GetNextLine(0, &line, true)) { return true; } diff --git a/webcc/http_parser.h b/webcc/http_parser.h index d4e8bdb..60ea968 100644 --- a/webcc/http_parser.h +++ b/webcc/http_parser.h @@ -4,6 +4,7 @@ #include #include "webcc/globals.h" +#include "webcc/http_file.h" namespace webcc { @@ -37,14 +38,16 @@ protected: // Get next line (using delimiter CRLF) from the pending data. // The line will not contain a trailing CRLF. - // If |remove| is true, the line, as well as the trailing CRLF, will be erased + // If |erase| is true, the line, as well as the trailing CRLF, will be erased // from the pending data. - bool NextPendingLine(std::size_t off, std::string* line, bool remove); + bool GetNextLine(std::size_t off, std::string* line, bool erase); virtual bool ParseStartLine(const std::string& line) = 0; bool ParseHeaderLine(const std::string& line); + virtual bool ParseContent(); + bool ParseFixedContent(); bool ParseChunkedContent(); @@ -77,6 +80,19 @@ protected: bool chunked_; std::size_t chunk_size_; bool finished_; + + struct Part { + enum Step { + kStart, + kBoundaryParsed, + kHeadersParsed, + kEnded, + }; + Step step = kStart; + std::string name; + HttpFile file; + }; + Part part_; }; } // namespace webcc diff --git a/webcc/http_request.h b/webcc/http_request.h index 1b53370..7df564d 100644 --- a/webcc/http_request.h +++ b/webcc/http_request.h @@ -1,6 +1,7 @@ #ifndef WEBCC_HTTP_REQUEST_H_ #define WEBCC_HTTP_REQUEST_H_ +#include #include #include #include @@ -56,13 +57,26 @@ public: return port().empty() ? default_port : port(); } + const std::map& files() const { + return files_; + } + + // Add a file to upload. + void AddFile(const std::string& name, HttpFile&& file) { + files_[name] = std::move(file); + } + // Prepare payload. // Compose start line, set Host header, etc. bool Prepare() final; private: std::string method_; + Url url_; + + // Files to upload for a POST request. + std::map files_; }; } // namespace webcc diff --git a/webcc/http_request_parser.cc b/webcc/http_request_parser.cc index 57cc9bd..eb8f2ea 100644 --- a/webcc/http_request_parser.cc +++ b/webcc/http_request_parser.cc @@ -1,9 +1,11 @@ #include "webcc/http_request_parser.h" #include + #include "boost/algorithm/string.hpp" #include "webcc/http_request.h" +#include "webcc/logger.h" namespace webcc { @@ -32,4 +34,207 @@ bool HttpRequestParser::ParseStartLine(const std::string& line) { return true; } +bool HttpRequestParser::ParseContent() { + if (chunked_) { + return ParseChunkedContent(); + } else { + if (request_->content_type().multipart()) { + return ParseMultipartContent(); + } else { + return ParseFixedContent(); + } + } +} + +bool HttpRequestParser::ParseMultipartContent() { + LOG_VERB("Parse multipart content (pending data size: %u).", + pending_data_.size()); + + if (!content_length_parsed_ || content_length_ == kInvalidLength) { + // Invalid content length (syntax error). + return false; + } + + while (true) { + if (pending_data_.empty()) { + // Wait data from next read. + break; + } + + if (part_.step == Part::Step::kStart) { + std::string line; + if (!GetNextLine(0, &line, true)) { + break; // Not enough data + } + if (!IsBoundary(line)) { + LOG_ERRO("Invalid boundary: %s", line.c_str()); + return false; + } + LOG_INFO("Boundary line: %s", line.c_str()); + // Go to next step. + part_.step = Part::Step::kBoundaryParsed; + continue; + } + + if (part_.step == Part::Step::kBoundaryParsed) { + bool need_more_data = false; + if (ParsePartHeaders(&need_more_data)) { + // Go to next step. + part_.step = Part::Step::kHeadersParsed; + LOG_INFO("Part headers just ended."); + continue; + } else { + if (need_more_data) { + // Need more data from next read. + break; + } else { + return false; + } + } + } + + if (part_.step == Part::Step::kHeadersParsed) { + std::size_t off = 0; + std::size_t count = 0; + bool ended = false; + if (!GetNextBoundaryLine(&off, &count, &ended)) { + // All pending data belongs to this part. + part_.file.AppendData(pending_data_); + pending_data_.clear(); + break; + } + + LOG_INFO("Next boundary has been found."); + + // This part has ended. + if (off > 2) { + // -2 for exluding the CRLF after the data. + part_.file.AppendData(pending_data_.data(), off - 2); + } + + request_->AddFile(part_.name, std::move(part_.file)); + + if (ended) { + // Go to the end step. + part_.step = Part::Step::kEnded; + break; + } else { + // Go to next step. + part_.step = Part::Step::kBoundaryParsed; + continue; + } + } + } + + if (part_.step == Part::Step::kEnded) { + LOG_INFO("Multipart data has ended."); + Finish(); + } + + return true; +} + +bool HttpRequestParser::ParsePartHeaders(bool* need_more_data) { + std::size_t off = 0; + + while (true) { + std::string line; + if (!GetNextLine(off, &line, false)) { + // Need more data from next read. + *need_more_data = true; + return false; + } + + off = off + line.size() + 2; // +2 for CRLF + + if (line.empty()) { + // Headers finished. + break; + } + + HttpHeader header; + if (!Split2(line, ':', &header.first, &header.second)) { + LOG_ERRO("Invalid part header line: %s", line.c_str()); + return false; + } + + LOG_INFO("Part header (%s: %s).", header.first.c_str(), + header.second.c_str()); + + // Parse Content-Disposition. + if (boost::iequals(header.first, http::headers::kContentDisposition)) { + ContentDisposition content_disposition(header.second); + if (!content_disposition.valid()) { + LOG_ERRO("Invalid content-disposition header: %s", + header.second.c_str()); + return false; + } + part_.name = content_disposition.name(); + part_.file.set_file_name(content_disposition.file_name()); + LOG_INFO("Content-Disposition (name=%s; filename=%s)", + part_.name.c_str(), part_.file.file_name().c_str()); + } + + // TODO: Parse other headers. + } + + // Remove the data which has just been parsed. + pending_data_.erase(0, off); + + return true; +} + +bool HttpRequestParser::GetNextBoundaryLine(std::size_t* b_off, + std::size_t* b_count, + bool* ended) { + std::size_t off = 0; + + while (true) { + std::size_t pos = pending_data_.find(kCRLF, off); + if (pos == std::string::npos) { + break; + } + + std::size_t count = pos - off; + if (count == 0) { + off = pos + 2; + continue; // Empty line + } + + // TODO: Avoid temp string. + std::string line = pending_data_.substr(off, count); + + if (IsBoundary(line)) { + *b_off = off; + *b_count = count; + return true; + } + + if (IsBoundaryEnd(line)) { + *b_off = off; + *b_count = count; + *ended = true; + return true; + } + + off = pos + 2; + } + + return false; +} + +bool HttpRequestParser::IsBoundary(const std::string& line) const { + if (line == "--" + request_->content_type().boundary()) { + return true; + } + return false; +} + +bool HttpRequestParser::IsBoundaryEnd(const std::string& line) const { + if (line == "--" + request_->content_type().boundary() + "--") { + return true; + } + return false; +} + } // namespace webcc diff --git a/webcc/http_request_parser.h b/webcc/http_request_parser.h index 163d1e5..8e27584 100644 --- a/webcc/http_request_parser.h +++ b/webcc/http_request_parser.h @@ -20,6 +20,19 @@ public: private: bool ParseStartLine(const std::string& line) final; + // Override to handle multipart form data which is request only. + bool ParseContent() final; + + // Multipart specific parsing helpers. + + bool ParseMultipartContent(); + bool ParsePartHeaders(bool* need_more_data); + bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count, + bool* ended); + bool IsBoundary(const std::string& line) const; + bool IsBoundaryEnd(const std::string& line) const; + +private: HttpRequest* request_; }; diff --git a/webcc/rest_request_handler.cc b/webcc/rest_request_handler.cc index 5847d82..3fe7f07 100644 --- a/webcc/rest_request_handler.cc +++ b/webcc/rest_request_handler.cc @@ -20,7 +20,9 @@ void RestRequestHandler::HandleConnection(HttpConnectionPtr connection) { const Url& url = http_request->url(); + // TODO RestRequest rest_request{ + http_request, http_request->method(), http_request->content(), url.query() }; diff --git a/webcc/rest_request_handler.h b/webcc/rest_request_handler.h index 3c65a0b..f07afc6 100644 --- a/webcc/rest_request_handler.h +++ b/webcc/rest_request_handler.h @@ -21,6 +21,7 @@ public: private: void HandleConnection(HttpConnectionPtr connection) final; +private: RestServiceManager service_manager_; }; diff --git a/webcc/rest_server.h b/webcc/rest_server.h index b99a1cb..330cfb2 100644 --- a/webcc/rest_server.h +++ b/webcc/rest_server.h @@ -36,6 +36,7 @@ private: return &request_handler_; } +private: RestRequestHandler request_handler_; }; diff --git a/webcc/rest_service.h b/webcc/rest_service.h index 99ee846..9e8c38f 100644 --- a/webcc/rest_service.h +++ b/webcc/rest_service.h @@ -14,6 +14,7 @@ #include #include "webcc/globals.h" +#include "webcc/http_request.h" #include "webcc/url.h" namespace webcc { @@ -24,6 +25,9 @@ namespace webcc { typedef std::vector UrlMatches; struct RestRequest { + // Original HTTP request. + HttpRequestPtr http; + // HTTP method (GET, POST, etc.). const std::string& method; @@ -65,9 +69,7 @@ class RestListService : public RestService { public: void Handle(const RestRequest& request, RestResponse* response) final; -public: - RestListService() = default; - +protected: virtual void Get(const UrlQuery& query, RestResponse* response) { } @@ -82,7 +84,7 @@ class RestDetailService : public RestService { public: void Handle(const RestRequest& request, RestResponse* response) final; -public: +protected: virtual void Get(const UrlMatches& url_matches, const UrlQuery& query, RestResponse* response) {