From df4bb33bd8591bd634fdac12e179ff5a1a180069 Mon Sep 17 00:00:00 2001 From: Chunting Gu Date: Mon, 15 Apr 2019 12:53:43 +0800 Subject: [PATCH] Fix content-type parsing issue; refine http parser. --- examples/file_upload_client.cc | 3 +- examples/github_client.cc | 3 +- examples/http_client.cc | 27 ++++++++++------- webcc/common.cc | 12 +++++++- webcc/http_message.h | 2 -- webcc/http_parser.cc | 55 +++++++++++++++++++--------------- webcc/http_parser.h | 6 ++-- webcc/http_request_parser.cc | 15 ++++++---- webcc/http_request_parser.h | 4 +-- 9 files changed, 76 insertions(+), 51 deletions(-) diff --git a/examples/file_upload_client.cc b/examples/file_upload_client.cc index 2c6749a..8a570cb 100644 --- a/examples/file_upload_client.cc +++ b/examples/file_upload_client.cc @@ -50,8 +50,7 @@ int main(int argc, char* argv[]) { webcc::HttpClientSession session; try { - // auto r = session.PostFile(url, "file", - // upload_dir / "remember.txt"); + //auto r = session.PostFile(url, "file", upload_dir / "remember.txt"); auto r = session.Request(webcc::HttpRequestBuilder{}.Post(). Url(url). diff --git a/examples/github_client.cc b/examples/github_client.cc index 41a187f..952bb4e 100644 --- a/examples/github_client.cc +++ b/examples/github_client.cc @@ -109,7 +109,8 @@ void CreateAuthorization(webcc::HttpClientSession& session, const std::string& auth) { try { - std::string data = "{'note': 'Webcc test', 'scopes': ['public_repo', 'repo', 'repo:status', 'user']}"; + std::string data = "{'note': 'Webcc test', 'scopes': ['public_repo',\ + 'repo', 'repo:status', 'user']}"; auto r = session.Post(kUrlRoot + "/authorizations", std::move(data), true, {"Authorization", auth}); diff --git a/examples/http_client.cc b/examples/http_client.cc index 02b8dec..8721309 100644 --- a/examples/http_client.cc +++ b/examples/http_client.cc @@ -15,12 +15,12 @@ bool kSslVerify = true; void ExampleBasic() { webcc::HttpClientSession session; - auto r = session.Request(webcc::HttpRequestBuilder{} - .Get() - .Url("http://httpbin.org/get") - .Query("key1", "value1") - .Query("key2", "value2") - .Header("Accept", "application/json")()); + auto r = session.Request(webcc::HttpRequestBuilder{}.Get(). + Url("http://httpbin.org/get"). + Query("key1", "value1"). + Query("key2", "value2"). + Header("Accept", "application/json") + ()); std::cout << r->content() << std::endl; } @@ -36,20 +36,23 @@ void ExampleShortcut() { std::cout << r->content() << std::endl; } +#if WEBCC_ENABLE_SSL + // HTTPS is auto-detected from the URL scheme. void ExampleHttps() { webcc::HttpClientSession session; session.set_ssl_verify(kSslVerify); - auto r = session.Request(webcc::HttpRequestBuilder{} - .Get() - .Url("https://httpbin.org/get") - .Query("key1", "value1") - .Header("Accept", "application/json")()); + auto r = session.Request(webcc::HttpRequestBuilder{}.Get(). + Url("https://httpbin.org/get"). + Query("key1", "value1"). + Header("Accept", "application/json")()); std::cout << r->content() << std::endl; } +#endif // WEBCC_ENABLE_SSL + // Example for testing Keep-Alive connection. // // Boost.org doesn't support persistent connection so always includes @@ -106,6 +109,8 @@ void ExampleImage(const std::string& path) { int main() { WEBCC_LOG_INIT("", webcc::LOG_CONSOLE); + webcc::HttpClientSession session; + try { ExampleBasic(); diff --git a/webcc/common.cc b/webcc/common.cc index b1eecad..3189ca9 100644 --- a/webcc/common.cc +++ b/webcc/common.cc @@ -142,7 +142,17 @@ bool ContentType::Valid() const { void ContentType::Init(const std::string& str) { std::string other; - Split2(str, ';', &media_type_, &other); + + std::size_t pos = str.find(';'); + if (pos == std::string::npos) { + media_type_ = str; + } else { + media_type_ = str.substr(0, pos); + other = str.substr(pos + 1); + } + + boost::trim(media_type_); + boost::trim(other); if (media_type_ == "multipart/form-data") { multipart_ = true; diff --git a/webcc/http_message.h b/webcc/http_message.h index d6846d2..5a066d5 100644 --- a/webcc/http_message.h +++ b/webcc/http_message.h @@ -103,8 +103,6 @@ protected: } protected: - // Start line with trailing CRLF. - // TODO: Don't include trailing CRLF since it's confusing. std::string start_line_; std::string content_; diff --git a/webcc/http_parser.cc b/webcc/http_parser.cc index 6d6176d..d8b11eb 100644 --- a/webcc/http_parser.cc +++ b/webcc/http_parser.cc @@ -43,28 +43,25 @@ void HttpParser::Init(HttpMessage* message) { } bool HttpParser::Parse(const char* data, std::size_t length) { + if (header_ended_) { + return ParseContent(data, length); + } + // Append the new data to the pending data. pending_data_.append(data, length); - if (!header_ended_) { - // If headers not ended yet, continue to parse headers. - if (!ParseHeaders()) { - return false; - } - - if (header_ended_) { - LOG_INFO("HTTP headers just ended."); - } + if (!ParseHeaders()) { + return false; } - // If headers still not ended, just return and wait for next read. if (!header_ended_) { LOG_INFO("HTTP headers will continue in next read."); return true; + } else { + LOG_INFO("HTTP headers just ended."); + // NOTE: The left data, if any, is still in the pending data. + return ParseContent("", 0); } - - // Now, parse the content. - return ParseContent(); } void HttpParser::Reset() { @@ -99,12 +96,14 @@ bool HttpParser::ParseHeaders() { if (!start_line_parsed_) { start_line_parsed_ = true; - message_->set_start_line(line + kCRLF); + message_->set_start_line(line); if (!ParseStartLine(line)) { return false; } } else { - ParseHeaderLine(line); + if (!ParseHeaderLine(line)) { + return false; + } } } @@ -182,7 +181,7 @@ bool HttpParser::ParseHeaderLine(const std::string& line) { if (boost::iequals(header.first, http::headers::kContentType)) { ContentType content_type(header.second); if (!content_type.Valid()) { - LOG_ERRO("Invalid content-type header."); + LOG_ERRO("Invalid content-type header: %s", header.second.c_str()); return false; } message_->SetContentType(content_type); @@ -193,15 +192,15 @@ bool HttpParser::ParseHeaderLine(const std::string& line) { return true; } -bool HttpParser::ParseContent() { +bool HttpParser::ParseContent(const char* data, std::size_t length) { if (chunked_) { - return ParseChunkedContent(); + return ParseChunkedContent(data, length); } else { - return ParseFixedContent(); + return ParseFixedContent(data, length); } } -bool HttpParser::ParseFixedContent() { +bool HttpParser::ParseFixedContent(const char* data, std::size_t length) { if (!content_length_parsed_) { // No Content-Length, no content. Finish(); @@ -213,10 +212,14 @@ bool HttpParser::ParseFixedContent() { return false; } - // TODO: Avoid copy using std::move. - AppendContent(pending_data_); + if (!pending_data_.empty()) { + // This is the data left after the headers are parsed. + AppendContent(pending_data_); + pending_data_.clear(); + } - pending_data_.clear(); + // NOTE: Don't have to firstly put the data to the pending data. + AppendContent(data, length); if (IsContentFull()) { // All content has been read. @@ -226,7 +229,11 @@ bool HttpParser::ParseFixedContent() { return true; } -bool HttpParser::ParseChunkedContent() { +bool HttpParser::ParseChunkedContent(const char* data, std::size_t length) { + // Append the new data to the pending data. + // NOTE: It's more difficult to avoid this than fixed-length content. + pending_data_.append(data, length); + LOG_VERB("Parse chunked content (pending data size: %u).", pending_data_.size()); diff --git a/webcc/http_parser.h b/webcc/http_parser.h index 7094d0e..7605adf 100644 --- a/webcc/http_parser.h +++ b/webcc/http_parser.h @@ -46,11 +46,11 @@ protected: bool ParseHeaderLine(const std::string& line); - virtual bool ParseContent(); + virtual bool ParseContent(const char* data, std::size_t length); - bool ParseFixedContent(); + bool ParseFixedContent(const char* data, std::size_t length); - bool ParseChunkedContent(); + bool ParseChunkedContent(const char* data, std::size_t length); bool ParseChunkSize(); // Return false if the compressed content cannot be decompressed. diff --git a/webcc/http_request_parser.cc b/webcc/http_request_parser.cc index 6175987..046731f 100644 --- a/webcc/http_request_parser.cc +++ b/webcc/http_request_parser.cc @@ -34,19 +34,24 @@ bool HttpRequestParser::ParseStartLine(const std::string& line) { return true; } -bool HttpRequestParser::ParseContent() { +bool HttpRequestParser::ParseContent(const char* data, std::size_t length) { if (chunked_) { - return ParseChunkedContent(); + return ParseChunkedContent(data, length); } else { if (request_->content_type().multipart()) { - return ParseMultipartContent(); + return ParseMultipartContent(data, length); } else { - return ParseFixedContent(); + return ParseFixedContent(data, length); } } } -bool HttpRequestParser::ParseMultipartContent() { +bool HttpRequestParser::ParseMultipartContent(const char* data, + std::size_t length) { + // Append the new data to the pending data. + // NOTE: It's more difficult to avoid this than normal fixed-length content. + pending_data_.append(data, length); + LOG_VERB("Parse multipart content (pending data size: %u).", pending_data_.size()); diff --git a/webcc/http_request_parser.h b/webcc/http_request_parser.h index efc2f32..b2390df 100644 --- a/webcc/http_request_parser.h +++ b/webcc/http_request_parser.h @@ -21,11 +21,11 @@ private: bool ParseStartLine(const std::string& line) final; // Override to handle multipart form data which is request only. - bool ParseContent() final; + bool ParseContent(const char* data, std::size_t length) final; // Multipart specific parsing helpers. - bool ParseMultipartContent(); + bool ParseMultipartContent(const char* data, std::size_t length); bool ParsePartHeaders(bool* need_more_data); bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count, bool* ended);