From 8d9855f7518bcfc9fa58f1b96cd4f4af46beb735 Mon Sep 17 00:00:00 2001 From: Chunting Gu Date: Thu, 31 Jan 2019 18:10:12 +0800 Subject: [PATCH] Support chunked response content (but no Trailer headers). --- CMakeLists.txt | 3 +- example/github_rest_client/CMakeLists.txt | 15 + .../main.cc | 0 example/http_ssl_client/main.cc | 38 ++- example/rest_github_client/CMakeLists.txt | 10 - webcc/globals.cc | 1 + webcc/globals.h | 1 + webcc/http_parser.cc | 276 ++++++++++++++---- webcc/http_parser.h | 21 +- webcc/http_ssl_client.cc | 13 +- webcc/http_ssl_client.h | 7 +- 11 files changed, 289 insertions(+), 96 deletions(-) create mode 100644 example/github_rest_client/CMakeLists.txt rename example/{rest_github_client => github_rest_client}/main.cc (100%) delete mode 100644 example/rest_github_client/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index abca1b2..0e01453 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,7 @@ if(WEBCC_ENABLE_SSL) find_package(OpenSSL) if(OPENSSL_FOUND) include_directories(${OPENSSL_INCLUDE_DIR}) + message(STATUS "OpenSSL libs: " ${OPENSSL_LIBRARIES}) endif() endif() @@ -142,7 +143,7 @@ if(WEBCC_ENABLE_EXAMPLES) if(WEBCC_ENABLE_SSL) add_subdirectory(example/http_ssl_client) - add_subdirectory(example/rest_github_client) + add_subdirectory(example/github_rest_client) endif() add_subdirectory(example/http_bin_client) diff --git a/example/github_rest_client/CMakeLists.txt b/example/github_rest_client/CMakeLists.txt new file mode 100644 index 0000000..de6477d --- /dev/null +++ b/example/github_rest_client/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LIBS webcc jsoncpp ${Boost_LIBRARIES} "${CMAKE_THREAD_LIBS_INIT}") + +set(LIBS ${LIBS} ${OPENSSL_LIBRARIES}) +if(WIN32) + set(LIBS ${LIBS} crypt32) +endif() + +if(UNIX) + # Add `-ldl` for Linux to avoid "undefined reference to `dlopen'". + set(LIBS ${LIBS} ${CMAKE_DL_LIBS}) +endif() + +add_executable(github_rest_client main.cc) + +target_link_libraries(github_rest_client ${LIBS}) diff --git a/example/rest_github_client/main.cc b/example/github_rest_client/main.cc similarity index 100% rename from example/rest_github_client/main.cc rename to example/github_rest_client/main.cc diff --git a/example/http_ssl_client/main.cc b/example/http_ssl_client/main.cc index e662bb2..ed204a2 100644 --- a/example/http_ssl_client/main.cc +++ b/example/http_ssl_client/main.cc @@ -3,19 +3,37 @@ #include "webcc/http_ssl_client.h" #include "webcc/logger.h" -void Test() { - webcc::HttpRequest request; - request.set_method(webcc::kHttpGet); - request.set_url("/LICENSE_1_0.txt"); +int main(int argc, char* argv[]) { + std::string host; + std::string url; - // Leave port to default value. - request.set_host("www.boost.org"); + if (argc != 3) { + host = "www.boost.org"; + url = "/LICENSE_1_0.txt"; + } else { + host = argv[1]; + url = argv[2]; + } + + std::cout << "Host: " << host << std::endl; + std::cout << "URL: " << url << std::endl; + std::cout << std::endl; + + WEBCC_LOG_INIT("", webcc::LOG_CONSOLE); + webcc::HttpRequest request; + request.set_method(webcc::kHttpGet); + request.set_url(url); + request.set_host(host); // Leave port to default value. request.Make(); webcc::HttpSslClient client; - if (client.Request(request)) { + // Verify the certificate of the peer or not. + // See HttpSslClient::Request() for more details. + bool ssl_verify = false; + + if (client.Request(request, ssl_verify)) { std::cout << client.response()->content() << std::endl; } else { std::cout << webcc::DescribeError(client.error()); @@ -24,12 +42,6 @@ void Test() { } std::cout << std::endl; } -} - -int main() { - WEBCC_LOG_INIT("", webcc::LOG_CONSOLE); - - Test(); return 0; } diff --git a/example/rest_github_client/CMakeLists.txt b/example/rest_github_client/CMakeLists.txt deleted file mode 100644 index 3c9917d..0000000 --- a/example/rest_github_client/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_executable(rest_github_client main.cc) - -set(SSL_LIBS ${OPENSSL_LIBRARIES}) -if(WIN32) - set(SSL_LIBS ${SSL_LIBS} crypt32) -endif() - -target_link_libraries(rest_github_client webcc jsoncpp ${Boost_LIBRARIES}) -target_link_libraries(rest_github_client "${CMAKE_THREAD_LIBS_INIT}") -target_link_libraries(rest_github_client ${SSL_LIBS}) diff --git a/webcc/globals.cc b/webcc/globals.cc index 170ce2b..60e85c1 100644 --- a/webcc/globals.cc +++ b/webcc/globals.cc @@ -9,6 +9,7 @@ namespace webcc { const std::string kHost = "Host"; const std::string kContentType = "Content-Type"; const std::string kContentLength = "Content-Length"; +const std::string kTransferEncoding = "Transfer-Encoding"; const std::string kUserAgent = "User-Agent"; const std::string kAppJsonUtf8 = "application/json; charset=utf-8"; diff --git a/webcc/globals.h b/webcc/globals.h index aeeff7b..9aee7b5 100644 --- a/webcc/globals.h +++ b/webcc/globals.h @@ -49,6 +49,7 @@ const std::size_t kMaxDumpSize = 2048; extern const std::string kHost; extern const std::string kContentType; extern const std::string kContentLength; +extern const std::string kTransferEncoding; extern const std::string kUserAgent; extern const std::string kAppJsonUtf8; diff --git a/webcc/http_parser.cc b/webcc/http_parser.cc index 2afa1e0..d6ec7d1 100644 --- a/webcc/http_parser.cc +++ b/webcc/http_parser.cc @@ -7,47 +7,78 @@ namespace webcc { +// ----------------------------------------------------------------------------- + +static bool StringToSizeT(const std::string& str, int base, + std::size_t* output) { + try { + *output = static_cast(std::stoul(str, 0, base)); + } catch (const std::exception&) { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- + HttpParser::HttpParser(HttpMessage* message) : message_(message), content_length_(kInvalidLength), start_line_parsed_(false), content_length_parsed_(false), - header_parsed_(false), + header_ended_(false), + chunked_(false), + chunk_size_(kInvalidLength), finished_(false) { } bool HttpParser::Parse(const char* data, std::size_t length) { - if (header_parsed_) { - // Append the data to the content. - AppendContent(data, length); + // Append the new data to the pending data. + pending_data_.append(data, length); - if (IsContentFull()) { - // All content has been read. - Finish(); + if (!header_ended_) { + // If headers not ended yet, continue to parse headers. + if (!ParseHeaders()) { + return false; } + if (header_ended_) { + LOG_INFO("HTTP headers just ended."); + } + } + + // If headers still not ended, just return and wait for next read. + if (!header_ended_) { + LOG_INFO("HTTP headers will continue in next read."); return true; } - // Continue to parse headers. - - pending_data_.append(data, length); + // Now, parse the content. + + if (chunked_) { + return ParseChunkedContent(); + } else { + return ParseFixedContent(); + } +} + +bool HttpParser::ParseHeaders() { std::size_t off = 0; while (true) { - std::size_t pos = pending_data_.find(CRLF, off); - if (pos == std::string::npos) { + std::string line; + if (!NextPendingLine(off, &line, false)) { + // Can't find a full header line, need more data from next read. break; } - if (pos == off) { // End of headers. - off = pos + 2; // Skip CRLF. - header_parsed_ = true; + off = off + line.size() + 2; // +2 for CRLF + + if (line.empty()) { + header_ended_ = true; break; } - std::string line = pending_data_.substr(off, pos - off); - if (!start_line_parsed_) { start_line_parsed_ = true; message_->set_start_line(line + CRLF); @@ -55,84 +86,201 @@ bool HttpParser::Parse(const char* data, std::size_t length) { return false; } } else { - ParseHeader(line); + ParseHeaderLine(line); } + } + + // Remove the parsed data. + pending_data_.erase(0, off); + + return true; +} - off = pos + 2; // Skip CRLF. +bool HttpParser::NextPendingLine(std::size_t off, std::string* line, + bool remove) { + std::size_t pos = pending_data_.find(CRLF, off); + + if (pos == std::string::npos) { + return false; } - if (header_parsed_) { - // Headers just ended. - LOG_INFO("HTTP headers parsed."); + std::size_t count = pos - off; - if (!content_length_parsed_) { - // No Content-Length, no content. - Finish(); - return true; - } else { - // Invalid Content-Length in the request. - if (content_length_ == kInvalidLength) { - return false; - } - } + if (pos > off) { + *line = pending_data_.substr(off, count); + } // else: empty line - AppendContent(pending_data_.substr(off)); + if (remove) { + pending_data_.erase(off, count + 2); + } - if (IsContentFull()) { - // All content has been read. - Finish(); - } - } else { - // Save the unparsed piece for next parsing. - pending_data_ = pending_data_.substr(off); + return true; +} + +bool HttpParser::ParseHeaderLine(const std::string& line) { + // NOTE: Can't split with ":" because date time also contains ":". + std::size_t pos = line.find(':'); + if (pos == std::string::npos) { + return false; } + std::string name = line.substr(0, pos); + boost::trim(name); + + std::string value = line.substr(pos + 1); + boost::trim(value); + + do { + if (!chunked_ && !content_length_parsed_) { + if (boost::iequals(name, kContentLength)) { + content_length_parsed_ = true; + + if (!StringToSizeT(value, 10, &content_length_)) { + LOG_ERRO("Invalid content length: %s.", value.c_str()); + return false; + } + + LOG_INFO("Content length: %u.", content_length_); + + try { + // Reserve memory to avoid frequent reallocation when append. + content_.reserve(content_length_); + } catch (const std::exception& e) { + LOG_ERRO("Failed to reserve content memory: %s.", e.what()); + return false; + } + + break; + } + } + + // TODO: Replace `!chunked_` with . + if (!chunked_ && !content_length_parsed_) { + if (boost::iequals(name, kTransferEncoding)) { + if (value == "chunked") { + // The content is chunked. + chunked_ = true; + } + + break; + } + } + } while (false); + + // Save the header to the result message. + message_->SetHeader(std::move(name), std::move(value)); + return true; } -bool HttpParser::ParseHeader(const std::string& line) { - std::vector parts; - boost::split(parts, line, boost::is_any_of(":")); +bool HttpParser::ParseFixedContent() { + if (!content_length_parsed_) { + // No Content-Length, no content. + Finish(); + return true; + } - if (parts.size() != 2) { + if (content_length_ == kInvalidLength) { + // Invalid content length (syntax error). + // Normally, shouldn't be here. return false; } - std::string& name = parts[0]; - std::string& value = parts[1]; + // TODO: Avoid copy using std::move. + AppendContent(pending_data_); - boost::trim(name); - boost::trim(value); + pending_data_.clear(); - if (!content_length_parsed_ && boost::iequals(name, kContentLength)) { - content_length_parsed_ = true; + if (IsContentFull()) { + // All content has been read. + Finish(); + } - try { - content_length_ = static_cast(std::stoul(value)); - } catch (const std::exception&) { - LOG_ERRO("Invalid content length: %s.", value.c_str()); - return false; + return true; +} + +bool HttpParser::ParseChunkedContent() { + LOG_VERB("Parse chunked content (pending data size: %u).", + pending_data_.size()); + + while (true) { + // Read chunk-size if necessary. + if (chunk_size_ == kInvalidLength) { + if (!ParseChunkSize()) { + return false; + } + + LOG_VERB("Chunk size: %u.", chunk_size_); + } + + if (chunk_size_ == 0) { + Finish(); + return true; } + + if (chunk_size_ + 2 <= pending_data_.size()) { // +2 for CRLF + AppendContent(pending_data_.c_str(), chunk_size_); - LOG_INFO("Content length: %u.", content_length_); + pending_data_.erase(0, chunk_size_ + 2); - try { - // Reserve memory to avoid frequent reallocation when append. - content_.reserve(content_length_); - } catch (const std::exception& e) { - LOG_ERRO("Failed to reserve content memory: %s.", e.what()); - return false; + // Reset chunk-size (NOT to 0). + chunk_size_ = kInvalidLength; + + // Continue (explicitly) to parse next chunk. + continue; + + } else if (chunk_size_ > pending_data_.size()) { + AppendContent(pending_data_); + + chunk_size_ -= pending_data_.size(); + + pending_data_.clear(); + + // Wait for more data from next read. + break; + + } else { + // Wait for more data from next read. + // if (chunk_size_ == pending_data_.size()) { + // + // } + break; } } - message_->SetHeader(std::move(name), std::move(value)); + return true; +} + +bool HttpParser::ParseChunkSize() { + LOG_VERB("Parse chunk size."); + + std::size_t off = 0; + std::string line; + if (!NextPendingLine(off, &line, true)) { + return true; + } + + LOG_VERB("Chunk size line: [%s].", line.c_str()); + + std::string hex_str; // e.g., "cf0" (3312) + + std::size_t pos = line.find(' '); + if (pos != std::string::npos) { + hex_str = line.substr(0, pos); + } else { + hex_str = line; + } + + if (!StringToSizeT(hex_str, 16, &chunk_size_)) { + LOG_ERRO("Invalid chunk-size: %s.", hex_str.c_str()); + return false; + } return true; } void HttpParser::Finish() { if (!content_.empty()) { - // Move content to message. message_->SetContent(std::move(content_), /*set_length*/false); } finished_ = true; diff --git a/webcc/http_parser.h b/webcc/http_parser.h index e531dbf..bdd1524 100644 --- a/webcc/http_parser.h +++ b/webcc/http_parser.h @@ -26,9 +26,24 @@ class HttpParser { bool Parse(const char* data, std::size_t length); protected: + // Parse headers from pending data. + // Return false only on syntax errors. + bool ParseHeaders(); + + // Get next line (using delimiter CRLF) from the pending data. + // The line will not contain a trailing CRLF. + // If |remove| is true, the line, as well as the trailing CRLF, will be erased + // from the pending data. + bool NextPendingLine(std::size_t off, std::string* line, bool remove); + virtual bool ParseStartLine(const std::string& line) = 0; - bool ParseHeader(const std::string& line); + bool ParseHeaderLine(const std::string& line); + + bool ParseFixedContent(); + + bool ParseChunkedContent(); + bool ParseChunkSize(); void Finish(); @@ -48,7 +63,9 @@ class HttpParser { std::string content_; bool start_line_parsed_; bool content_length_parsed_; - bool header_parsed_; + bool header_ended_; + bool chunked_; + std::size_t chunk_size_; bool finished_; }; diff --git a/webcc/http_ssl_client.cc b/webcc/http_ssl_client.cc index 49df839..8ae7ef9 100644 --- a/webcc/http_ssl_client.cc +++ b/webcc/http_ssl_client.cc @@ -34,7 +34,7 @@ void HttpSslClient::SetTimeout(int seconds) { } } -bool HttpSslClient::Request(const HttpRequest& request) { +bool HttpSslClient::Request(const HttpRequest& request, bool ssl_verify) { io_context_.restart(); response_.reset(new HttpResponse()); @@ -48,7 +48,7 @@ bool HttpSslClient::Request(const HttpRequest& request) { return false; } - if ((error_ = Handshake(request.host())) != kNoError) { + if ((error_ = Handshake(request.host(), ssl_verify)) != kNoError) { return false; } @@ -95,8 +95,13 @@ Error HttpSslClient::Connect(const HttpRequest& request) { } // NOTE: Don't check timeout. It doesn't make much sense. -Error HttpSslClient::Handshake(const std::string& host) { - ssl_socket_.set_verify_mode(ssl::verify_peer); +Error HttpSslClient::Handshake(const std::string& host, bool ssl_verify) { + if (ssl_verify) { + ssl_socket_.set_verify_mode(ssl::verify_peer); + } else { + ssl_socket_.set_verify_mode(ssl::verify_none); + } + ssl_socket_.set_verify_callback(ssl::rfc2818_verification(host)); // Use sync API directly since we don't need timeout control. diff --git a/webcc/http_ssl_client.h b/webcc/http_ssl_client.h index 38dc8cf..a2827d4 100644 --- a/webcc/http_ssl_client.h +++ b/webcc/http_ssl_client.h @@ -34,7 +34,10 @@ class HttpSslClient { void SetTimeout(int seconds); // Connect to server, send request, wait until response is received. - bool Request(const HttpRequest& request); + // NOTE: SSL verification (ssl_verify=true) needs CA certificates to be found + // in the default verify paths of OpenSSL. On Windows, it means you need to + // set environment variable SSL_CERT_FILE properly. + bool Request(const HttpRequest& request, bool ssl_verify = true); HttpResponsePtr response() const { return response_; } @@ -45,7 +48,7 @@ class HttpSslClient { private: Error Connect(const HttpRequest& request); - Error Handshake(const std::string& host); + Error Handshake(const std::string& host, bool ssl_verify); Error SendReqeust(const HttpRequest& request);