Support chunked response content (but no Trailer headers).

master
Chunting Gu 7 years ago
parent 01c7a6e328
commit 8d9855f751

@ -90,6 +90,7 @@ if(WEBCC_ENABLE_SSL)
find_package(OpenSSL)
if(OPENSSL_FOUND)
include_directories(${OPENSSL_INCLUDE_DIR})
message(STATUS "OpenSSL libs: " ${OPENSSL_LIBRARIES})
endif()
endif()
@ -142,7 +143,7 @@ if(WEBCC_ENABLE_EXAMPLES)
if(WEBCC_ENABLE_SSL)
add_subdirectory(example/http_ssl_client)
add_subdirectory(example/rest_github_client)
add_subdirectory(example/github_rest_client)
endif()
add_subdirectory(example/http_bin_client)

@ -0,0 +1,15 @@
set(LIBS webcc jsoncpp ${Boost_LIBRARIES} "${CMAKE_THREAD_LIBS_INIT}")
set(LIBS ${LIBS} ${OPENSSL_LIBRARIES})
if(WIN32)
set(LIBS ${LIBS} crypt32)
endif()
if(UNIX)
# Add `-ldl` for Linux to avoid "undefined reference to `dlopen'".
set(LIBS ${LIBS} ${CMAKE_DL_LIBS})
endif()
add_executable(github_rest_client main.cc)
target_link_libraries(github_rest_client ${LIBS})

@ -3,19 +3,37 @@
#include "webcc/http_ssl_client.h"
#include "webcc/logger.h"
void Test() {
webcc::HttpRequest request;
request.set_method(webcc::kHttpGet);
request.set_url("/LICENSE_1_0.txt");
int main(int argc, char* argv[]) {
std::string host;
std::string url;
// Leave port to default value.
request.set_host("www.boost.org");
if (argc != 3) {
host = "www.boost.org";
url = "/LICENSE_1_0.txt";
} else {
host = argv[1];
url = argv[2];
}
std::cout << "Host: " << host << std::endl;
std::cout << "URL: " << url << std::endl;
std::cout << std::endl;
WEBCC_LOG_INIT("", webcc::LOG_CONSOLE);
webcc::HttpRequest request;
request.set_method(webcc::kHttpGet);
request.set_url(url);
request.set_host(host); // Leave port to default value.
request.Make();
webcc::HttpSslClient client;
if (client.Request(request)) {
// Verify the certificate of the peer or not.
// See HttpSslClient::Request() for more details.
bool ssl_verify = false;
if (client.Request(request, ssl_verify)) {
std::cout << client.response()->content() << std::endl;
} else {
std::cout << webcc::DescribeError(client.error());
@ -24,12 +42,6 @@ void Test() {
}
std::cout << std::endl;
}
}
int main() {
WEBCC_LOG_INIT("", webcc::LOG_CONSOLE);
Test();
return 0;
}

@ -1,10 +0,0 @@
add_executable(rest_github_client main.cc)
set(SSL_LIBS ${OPENSSL_LIBRARIES})
if(WIN32)
set(SSL_LIBS ${SSL_LIBS} crypt32)
endif()
target_link_libraries(rest_github_client webcc jsoncpp ${Boost_LIBRARIES})
target_link_libraries(rest_github_client "${CMAKE_THREAD_LIBS_INIT}")
target_link_libraries(rest_github_client ${SSL_LIBS})

@ -9,6 +9,7 @@ namespace webcc {
const std::string kHost = "Host";
const std::string kContentType = "Content-Type";
const std::string kContentLength = "Content-Length";
const std::string kTransferEncoding = "Transfer-Encoding";
const std::string kUserAgent = "User-Agent";
const std::string kAppJsonUtf8 = "application/json; charset=utf-8";

@ -49,6 +49,7 @@ const std::size_t kMaxDumpSize = 2048;
extern const std::string kHost;
extern const std::string kContentType;
extern const std::string kContentLength;
extern const std::string kTransferEncoding;
extern const std::string kUserAgent;
extern const std::string kAppJsonUtf8;

@ -7,47 +7,78 @@
namespace webcc {
// -----------------------------------------------------------------------------
static bool StringToSizeT(const std::string& str, int base,
std::size_t* output) {
try {
*output = static_cast<std::size_t>(std::stoul(str, 0, base));
} catch (const std::exception&) {
return false;
}
return true;
}
// -----------------------------------------------------------------------------
HttpParser::HttpParser(HttpMessage* message)
: message_(message),
content_length_(kInvalidLength),
start_line_parsed_(false),
content_length_parsed_(false),
header_parsed_(false),
header_ended_(false),
chunked_(false),
chunk_size_(kInvalidLength),
finished_(false) {
}
bool HttpParser::Parse(const char* data, std::size_t length) {
if (header_parsed_) {
// Append the data to the content.
AppendContent(data, length);
// Append the new data to the pending data.
pending_data_.append(data, length);
if (IsContentFull()) {
// All content has been read.
Finish();
if (!header_ended_) {
// If headers not ended yet, continue to parse headers.
if (!ParseHeaders()) {
return false;
}
if (header_ended_) {
LOG_INFO("HTTP headers just ended.");
}
}
// If headers still not ended, just return and wait for next read.
if (!header_ended_) {
LOG_INFO("HTTP headers will continue in next read.");
return true;
}
// Continue to parse headers.
pending_data_.append(data, length);
// Now, parse the content.
if (chunked_) {
return ParseChunkedContent();
} else {
return ParseFixedContent();
}
}
bool HttpParser::ParseHeaders() {
std::size_t off = 0;
while (true) {
std::size_t pos = pending_data_.find(CRLF, off);
if (pos == std::string::npos) {
std::string line;
if (!NextPendingLine(off, &line, false)) {
// Can't find a full header line, need more data from next read.
break;
}
if (pos == off) { // End of headers.
off = pos + 2; // Skip CRLF.
header_parsed_ = true;
off = off + line.size() + 2; // +2 for CRLF
if (line.empty()) {
header_ended_ = true;
break;
}
std::string line = pending_data_.substr(off, pos - off);
if (!start_line_parsed_) {
start_line_parsed_ = true;
message_->set_start_line(line + CRLF);
@ -55,84 +86,201 @@ bool HttpParser::Parse(const char* data, std::size_t length) {
return false;
}
} else {
ParseHeader(line);
ParseHeaderLine(line);
}
}
// Remove the parsed data.
pending_data_.erase(0, off);
return true;
}
off = pos + 2; // Skip CRLF.
bool HttpParser::NextPendingLine(std::size_t off, std::string* line,
bool remove) {
std::size_t pos = pending_data_.find(CRLF, off);
if (pos == std::string::npos) {
return false;
}
if (header_parsed_) {
// Headers just ended.
LOG_INFO("HTTP headers parsed.");
std::size_t count = pos - off;
if (!content_length_parsed_) {
// No Content-Length, no content.
Finish();
return true;
} else {
// Invalid Content-Length in the request.
if (content_length_ == kInvalidLength) {
return false;
}
}
if (pos > off) {
*line = pending_data_.substr(off, count);
} // else: empty line
AppendContent(pending_data_.substr(off));
if (remove) {
pending_data_.erase(off, count + 2);
}
if (IsContentFull()) {
// All content has been read.
Finish();
}
} else {
// Save the unparsed piece for next parsing.
pending_data_ = pending_data_.substr(off);
return true;
}
bool HttpParser::ParseHeaderLine(const std::string& line) {
// NOTE: Can't split with ":" because date time also contains ":".
std::size_t pos = line.find(':');
if (pos == std::string::npos) {
return false;
}
std::string name = line.substr(0, pos);
boost::trim(name);
std::string value = line.substr(pos + 1);
boost::trim(value);
do {
if (!chunked_ && !content_length_parsed_) {
if (boost::iequals(name, kContentLength)) {
content_length_parsed_ = true;
if (!StringToSizeT(value, 10, &content_length_)) {
LOG_ERRO("Invalid content length: %s.", value.c_str());
return false;
}
LOG_INFO("Content length: %u.", content_length_);
try {
// Reserve memory to avoid frequent reallocation when append.
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
}
break;
}
}
// TODO: Replace `!chunked_` with <TransferEncodingParsed>.
if (!chunked_ && !content_length_parsed_) {
if (boost::iequals(name, kTransferEncoding)) {
if (value == "chunked") {
// The content is chunked.
chunked_ = true;
}
break;
}
}
} while (false);
// Save the header to the result message.
message_->SetHeader(std::move(name), std::move(value));
return true;
}
bool HttpParser::ParseHeader(const std::string& line) {
std::vector<std::string> parts;
boost::split(parts, line, boost::is_any_of(":"));
bool HttpParser::ParseFixedContent() {
if (!content_length_parsed_) {
// No Content-Length, no content.
Finish();
return true;
}
if (parts.size() != 2) {
if (content_length_ == kInvalidLength) {
// Invalid content length (syntax error).
// Normally, shouldn't be here.
return false;
}
std::string& name = parts[0];
std::string& value = parts[1];
// TODO: Avoid copy using std::move.
AppendContent(pending_data_);
boost::trim(name);
boost::trim(value);
pending_data_.clear();
if (!content_length_parsed_ && boost::iequals(name, kContentLength)) {
content_length_parsed_ = true;
if (IsContentFull()) {
// All content has been read.
Finish();
}
try {
content_length_ = static_cast<std::size_t>(std::stoul(value));
} catch (const std::exception&) {
LOG_ERRO("Invalid content length: %s.", value.c_str());
return false;
return true;
}
bool HttpParser::ParseChunkedContent() {
LOG_VERB("Parse chunked content (pending data size: %u).",
pending_data_.size());
while (true) {
// Read chunk-size if necessary.
if (chunk_size_ == kInvalidLength) {
if (!ParseChunkSize()) {
return false;
}
LOG_VERB("Chunk size: %u.", chunk_size_);
}
if (chunk_size_ == 0) {
Finish();
return true;
}
if (chunk_size_ + 2 <= pending_data_.size()) { // +2 for CRLF
AppendContent(pending_data_.c_str(), chunk_size_);
LOG_INFO("Content length: %u.", content_length_);
pending_data_.erase(0, chunk_size_ + 2);
try {
// Reserve memory to avoid frequent reallocation when append.
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
// Reset chunk-size (NOT to 0).
chunk_size_ = kInvalidLength;
// Continue (explicitly) to parse next chunk.
continue;
} else if (chunk_size_ > pending_data_.size()) {
AppendContent(pending_data_);
chunk_size_ -= pending_data_.size();
pending_data_.clear();
// Wait for more data from next read.
break;
} else {
// Wait for more data from next read.
// if (chunk_size_ == pending_data_.size()) {
// <Also wait for CRLF from next read>
// }
break;
}
}
message_->SetHeader(std::move(name), std::move(value));
return true;
}
bool HttpParser::ParseChunkSize() {
LOG_VERB("Parse chunk size.");
std::size_t off = 0;
std::string line;
if (!NextPendingLine(off, &line, true)) {
return true;
}
LOG_VERB("Chunk size line: [%s].", line.c_str());
std::string hex_str; // e.g., "cf0" (3312)
std::size_t pos = line.find(' ');
if (pos != std::string::npos) {
hex_str = line.substr(0, pos);
} else {
hex_str = line;
}
if (!StringToSizeT(hex_str, 16, &chunk_size_)) {
LOG_ERRO("Invalid chunk-size: %s.", hex_str.c_str());
return false;
}
return true;
}
void HttpParser::Finish() {
if (!content_.empty()) {
// Move content to message.
message_->SetContent(std::move(content_), /*set_length*/false);
}
finished_ = true;

@ -26,9 +26,24 @@ class HttpParser {
bool Parse(const char* data, std::size_t length);
protected:
// Parse headers from pending data.
// Return false only on syntax errors.
bool ParseHeaders();
// Get next line (using delimiter CRLF) from the pending data.
// The line will not contain a trailing CRLF.
// If |remove| is true, the line, as well as the trailing CRLF, will be erased
// from the pending data.
bool NextPendingLine(std::size_t off, std::string* line, bool remove);
virtual bool ParseStartLine(const std::string& line) = 0;
bool ParseHeader(const std::string& line);
bool ParseHeaderLine(const std::string& line);
bool ParseFixedContent();
bool ParseChunkedContent();
bool ParseChunkSize();
void Finish();
@ -48,7 +63,9 @@ class HttpParser {
std::string content_;
bool start_line_parsed_;
bool content_length_parsed_;
bool header_parsed_;
bool header_ended_;
bool chunked_;
std::size_t chunk_size_;
bool finished_;
};

@ -34,7 +34,7 @@ void HttpSslClient::SetTimeout(int seconds) {
}
}
bool HttpSslClient::Request(const HttpRequest& request) {
bool HttpSslClient::Request(const HttpRequest& request, bool ssl_verify) {
io_context_.restart();
response_.reset(new HttpResponse());
@ -48,7 +48,7 @@ bool HttpSslClient::Request(const HttpRequest& request) {
return false;
}
if ((error_ = Handshake(request.host())) != kNoError) {
if ((error_ = Handshake(request.host(), ssl_verify)) != kNoError) {
return false;
}
@ -95,8 +95,13 @@ Error HttpSslClient::Connect(const HttpRequest& request) {
}
// NOTE: Don't check timeout. It doesn't make much sense.
Error HttpSslClient::Handshake(const std::string& host) {
ssl_socket_.set_verify_mode(ssl::verify_peer);
Error HttpSslClient::Handshake(const std::string& host, bool ssl_verify) {
if (ssl_verify) {
ssl_socket_.set_verify_mode(ssl::verify_peer);
} else {
ssl_socket_.set_verify_mode(ssl::verify_none);
}
ssl_socket_.set_verify_callback(ssl::rfc2818_verification(host));
// Use sync API directly since we don't need timeout control.

@ -34,7 +34,10 @@ class HttpSslClient {
void SetTimeout(int seconds);
// Connect to server, send request, wait until response is received.
bool Request(const HttpRequest& request);
// NOTE: SSL verification (ssl_verify=true) needs CA certificates to be found
// in the default verify paths of OpenSSL. On Windows, it means you need to
// set environment variable SSL_CERT_FILE properly.
bool Request(const HttpRequest& request, bool ssl_verify = true);
HttpResponsePtr response() const { return response_; }
@ -45,7 +48,7 @@ class HttpSslClient {
private:
Error Connect(const HttpRequest& request);
Error Handshake(const std::string& host);
Error Handshake(const std::string& host, bool ssl_verify);
Error SendReqeust(const HttpRequest& request);

Loading…
Cancel
Save