From d906470a4a198dc6e0c8215d329411ef24cd2896 Mon Sep 17 00:00:00 2001 From: Chunting Gu Date: Wed, 20 Mar 2019 17:47:45 +0800 Subject: [PATCH] Support request and response compression. --- examples/github_client.cc | 16 ++++++ examples/rest_book_server.cc | 7 +++ webcc/globals.h | 14 +++++ webcc/http_client_session.cc | 35 ++++++++----- webcc/http_client_session.h | 10 ++++ webcc/http_connection.cc | 24 +++++---- webcc/http_connection.h | 8 ++- webcc/http_message.cc | 17 ++++++ webcc/http_message.h | 5 ++ webcc/http_parser.cc | 16 +----- webcc/http_request.h | 2 +- webcc/http_response.cc | 17 +++--- webcc/http_response.h | 14 ++--- webcc/rest_request_handler.cc | 25 +++++++-- webcc/rest_service.h | 4 ++ webcc/soap_request_handler.cc | 30 +++++++---- webcc/zlib_wrapper.cc | 99 ++++++++++++++++++++++++++--------- webcc/zlib_wrapper.h | 7 ++- 18 files changed, 250 insertions(+), 100 deletions(-) diff --git a/examples/github_client.cc b/examples/github_client.cc index 62cbbcb..59d11dc 100644 --- a/examples/github_client.cc +++ b/examples/github_client.cc @@ -103,6 +103,22 @@ void ListAuthUserFollowers(webcc::HttpClientSession& session, } } +void CreateAuthorization(webcc::HttpClientSession& session, + const std::string& auth) { + try { + + std::string data = "{'note': 'Webcc test', 'scopes': ['public_repo', 'repo', 'repo:status', 'user']}"; + + auto r = session.Post(kUrlRoot + "/authorizations", std::move(data), true, + {"Authorization", auth}); + + std::cout << r->content() << std::endl; + + } catch (const webcc::Exception& e) { + std::cout << e.what() << std::endl; + } +} + // ----------------------------------------------------------------------------- int main() { diff --git a/examples/rest_book_server.cc b/examples/rest_book_server.cc index de44816..5b32d1e 100644 --- a/examples/rest_book_server.cc +++ b/examples/rest_book_server.cc @@ -98,7 +98,10 @@ void BookListService::Get(const webcc::UrlQuery& /*query*/, json.append(BookToJson(book)); } + // TODO: Simplify response->content = JsonToString(json); + response->media_type = webcc::http::media_types::kApplicationJson; + response->charset = "utf-8"; response->status = webcc::http::Status::kOK; } @@ -114,6 +117,8 @@ void BookListService::Post(const std::string& request_content, json["id"] = id; response->content = JsonToString(json); + response->media_type = webcc::http::media_types::kApplicationJson; + response->charset = "utf-8"; response->status = webcc::http::Status::kCreated; } else { // Invalid JSON @@ -144,6 +149,8 @@ void BookDetailService::Get(const webcc::UrlMatches& url_matches, } response->content = BookToJsonString(book); + response->media_type = webcc::http::media_types::kApplicationJson; + response->charset = "utf-8"; response->status = webcc::http::Status::kOK; } diff --git a/webcc/globals.h b/webcc/globals.h index 09b77f7..b927069 100644 --- a/webcc/globals.h +++ b/webcc/globals.h @@ -28,6 +28,12 @@ const std::size_t kBufferSize = 1024; const char* const kPort80 = "80"; const char* const kPort443 = "443"; +// Why 1400? See the following page: +// https://www.itworld.com/article/2693941/why-it-doesn-t-make-sense-to- +// gzip-all-content-from-your-web-server.html +// TODO: Configurable +const std::size_t kGzipThreshold = 1400; + // ----------------------------------------------------------------------------- // HTTP headers. @@ -64,6 +70,7 @@ namespace headers { // NOTE: Field names are case-insensitive. // See https://stackoverflow.com/a/5259004 for more details. const char* const kHost = "Host"; +const char* const kDate = "Date"; const char* const kContentType = "Content-Type"; const char* const kContentLength = "Content-Length"; const char* const kContentEncoding = "Content-Encoding"; @@ -72,6 +79,7 @@ const char* const kTransferEncoding = "Transfer-Encoding"; const char* const kAccept = "Accept"; const char* const kAcceptEncoding = "Accept-Encoding"; const char* const kUserAgent = "User-Agent"; +const char* const kServer = "Server"; } // namespace headers @@ -95,6 +103,12 @@ const char* const kUtf8 = "utf-8"; } // namespace charsets +enum class ContentEncoding { + kUnknown, + kGzip, + kDeflate, +}; + // Return default user agent for HTTP headers. const std::string& UserAgent(); diff --git a/webcc/http_client_session.cc b/webcc/http_client_session.cc index fa0f539..0279423 100644 --- a/webcc/http_client_session.cc +++ b/webcc/http_client_session.cc @@ -1,6 +1,7 @@ #include "webcc/http_client_session.h" #include "webcc/url.h" +#include "webcc/zlib_wrapper.h" namespace webcc { @@ -33,7 +34,6 @@ std::size_t GetBufferSize(std::size_t session_buffer_size, // ----------------------------------------------------------------------------- - HttpClientSession::HttpClientSession() { InitHeaders(); } @@ -48,17 +48,6 @@ HttpResponsePtr HttpClientSession::Request(HttpRequestArgs&& args) { request.AddParameter(args.parameters_[i - 1], args.parameters_[i]); } - if (!args.data_.empty()) { - request.SetContent(std::move(args.data_), true); - - // TODO: Request-level charset. - if (args.json_) { - request.SetContentType(http::media_types::kApplicationJson, charset_); - } else if (!content_type_.empty()) { - request.SetContentType(content_type_, charset_); - } - } - // Apply the session-level headers. for (const HttpHeader& h : headers_.data()) { request.SetHeader(h.first, h.second); @@ -76,6 +65,28 @@ HttpResponsePtr HttpClientSession::Request(HttpRequestArgs&& args) { request.SetHeader(http::headers::kConnection, "Close"); } + if (!args.data_.empty()) { + if (gzip_ && args.data_.size() > kGzipThreshold) { + std::string compressed; + if (Compress(args.data_, &compressed)) { + request.SetContent(std::move(compressed), true); + request.SetHeader(http::headers::kContentEncoding, "gzip"); + } else { + LOG_WARN("Cannot compress the content data!"); + request.SetContent(std::move(args.data_), true); + } + } else { + request.SetContent(std::move(args.data_), true); + } + + // TODO: Request-level charset. + if (args.json_) { + request.SetContentType(http::media_types::kApplicationJson, charset_); + } else if (!content_type_.empty()) { + request.SetContentType(content_type_, charset_); + } + } + request.Prepare(); bool ssl_verify = GetSslVerify(ssl_verify_, args.ssl_verify_); diff --git a/webcc/http_client_session.h b/webcc/http_client_session.h index 4653df3..53c18aa 100644 --- a/webcc/http_client_session.h +++ b/webcc/http_client_session.h @@ -41,6 +41,10 @@ public: } } + void set_gzip(bool gzip) { + gzip_ = gzip; + } + void AddHeader(const std::string& key, const std::string& value) { headers_.Add(key, value); } @@ -87,6 +91,12 @@ private: // Timeout in seconds for receiving response. int timeout_ = 0; + // Compress the request content. + // NOTE: Most servers don't support compressed requests. + // Even the requests module from Python doesn't have a built-in support. + // See: https://github.com/kennethreitz/requests/issues/1753 + bool gzip_ = false; + // Connection pool for keep-alive. HttpClientPool pool_; }; diff --git a/webcc/http_connection.cc b/webcc/http_connection.cc index 580a75e..9d89ecc 100644 --- a/webcc/http_connection.cc +++ b/webcc/http_connection.cc @@ -32,17 +32,21 @@ void HttpConnection::Close() { } } -void HttpConnection::SetResponseContent(std::string&& content, - const std::string& media_type, - const std::string& charset) { - response_.SetContent(std::move(content), true); - response_.SetContentType(media_type, charset); +void HttpConnection::SendResponse(HttpResponsePtr response) { + assert(response); + + response_ = response; + + // TODO: Support keep-alive. + response_->SetHeader(http::headers::kConnection, "Close"); + + response_->Prepare(); + + DoWrite(); } void HttpConnection::SendResponse(http::Status status) { - response_.set_status(status); - response_.Prepare(); - DoWrite(); + SendResponse(std::make_shared(status)); } void HttpConnection::DoRead() { @@ -83,9 +87,9 @@ void HttpConnection::OnRead(boost::system::error_code ec, std::size_t length) { } void HttpConnection::DoWrite() { - LOG_VERB("HTTP response:\n%s", response_.Dump(4, "> ").c_str()); + LOG_VERB("HTTP response:\n%s", response_->Dump(4, "> ").c_str()); - boost::asio::async_write(socket_, response_.ToBuffers(), + boost::asio::async_write(socket_, response_->ToBuffers(), std::bind(&HttpConnection::OnWrite, shared_from_this(), std::placeholders::_1, std::placeholders::_2)); diff --git a/webcc/http_connection.h b/webcc/http_connection.h index 7ae7f35..fb38cb3 100644 --- a/webcc/http_connection.h +++ b/webcc/http_connection.h @@ -39,11 +39,9 @@ public: // Close the socket. void Close(); - void SetResponseContent(std::string&& content, - const std::string& media_type, - const std::string& charset); + // Send response to client. + void SendResponse(HttpResponsePtr response); - // Send response to client with the given status. void SendResponse(http::Status status); private: @@ -72,7 +70,7 @@ private: HttpRequestParser request_parser_; // The response to be sent back to the client. - HttpResponse response_; + HttpResponsePtr response_; }; } // namespace webcc diff --git a/webcc/http_message.cc b/webcc/http_message.cc index 10e440d..2c23007 100644 --- a/webcc/http_message.cc +++ b/webcc/http_message.cc @@ -87,6 +87,23 @@ bool HttpMessage::IsConnectionKeepAlive() const { return false; } +http::ContentEncoding HttpMessage::GetContentEncoding() const { + const std::string& encoding = GetHeader(http::headers::kContentEncoding); + if (encoding == "gzip") { + return http::ContentEncoding::kGzip; + } + if (encoding == "deflate") { + return http::ContentEncoding::kDeflate; + } + return http::ContentEncoding::kUnknown; +} + +bool HttpMessage::AcceptEncodingGzip() const { + using http::headers::kAcceptEncoding; + + return GetHeader(kAcceptEncoding).find("gzip") != std::string::npos; +} + // See: https://tools.ietf.org/html/rfc7231#section-3.1.1.1 void HttpMessage::SetContentType(const std::string& media_type, const std::string& charset) { diff --git a/webcc/http_message.h b/webcc/http_message.h index 017f4f1..1fdaf96 100644 --- a/webcc/http_message.h +++ b/webcc/http_message.h @@ -97,6 +97,11 @@ public: return headers_.Get(key, existed); } + http::ContentEncoding GetContentEncoding() const; + + // Return true if header Accept-Encoding contains "gzip". + bool AcceptEncodingGzip() const; + // E.g., "text/html", "application/json; charset=utf-8", etc. void SetContentType(const std::string& media_type, const std::string& charset); diff --git a/webcc/http_parser.cc b/webcc/http_parser.cc index 20063cc..e0d3b1b 100644 --- a/webcc/http_parser.cc +++ b/webcc/http_parser.cc @@ -295,7 +295,7 @@ bool HttpParser::Finish() { LOG_INFO("Decompress the HTTP content..."); std::string decompressed; - if (!Decompress(content_, decompressed)) { + if (!Decompress(content_, &decompressed)) { LOG_ERRO("Cannot decompress the HTTP content!"); return false; } @@ -318,19 +318,7 @@ bool HttpParser::IsContentFull() const { } bool HttpParser::IsContentCompressed() const { - using http::headers::kContentEncoding; - - const std::string& encoding = message_->GetHeader(kContentEncoding); - - if (encoding.find("gzip") != std::string::npos) { - return true; - } - - if (encoding.find("deflate") != std::string::npos) { - return true; - } - - return false; + return message_->GetContentEncoding() != http::ContentEncoding::kUnknown; } } // namespace webcc diff --git a/webcc/http_request.h b/webcc/http_request.h index 38874be..e8eb712 100644 --- a/webcc/http_request.h +++ b/webcc/http_request.h @@ -59,7 +59,7 @@ public: // Prepare payload. // Compose start line, set Host header, etc. - bool Prepare() override; + bool Prepare() final; private: std::string method_; diff --git a/webcc/http_response.cc b/webcc/http_response.cc index 35890e8..fe366d6 100644 --- a/webcc/http_response.cc +++ b/webcc/http_response.cc @@ -60,22 +60,21 @@ const std::string& ToString(int status) { bool HttpResponse::Prepare() { start_line_ = status_strings::ToString(status_); - SetHeader("Server", http::UserAgent()); - SetHeader("Date", GetHttpDateTimestamp()); - - // TODO: Support Keep-Alive. - SetHeader(http::headers::kConnection, "Close"); + SetHeader(http::headers::kServer, http::UserAgent()); + SetHeader(http::headers::kDate, GetHttpDateTimestamp()); return true; } -HttpResponse HttpResponse::Fault(http::Status status) { +HttpResponsePtr HttpResponse::Fault(http::Status status) { assert(status != http::Status::kOK); - HttpResponse response; - response.set_status(status); + auto response = std::make_shared(status); + + // TODO + response->SetHeader(http::headers::kConnection, "Close"); - response.Prepare(); + //response->Prepare(); return response; } diff --git a/webcc/http_response.h b/webcc/http_response.h index e46321d..d7f346a 100644 --- a/webcc/http_response.h +++ b/webcc/http_response.h @@ -8,9 +8,14 @@ namespace webcc { +class HttpResponse; +typedef std::shared_ptr HttpResponsePtr; + class HttpResponse : public HttpMessage { public: - HttpResponse() : status_(http::Status::kOK) {} + explicit HttpResponse(http::Status status = http::Status::kOK) + : status_(status) { + } ~HttpResponse() override = default; @@ -19,18 +24,15 @@ public: void set_status(int status) { status_ = status; } // Set start line according to status code. - bool Prepare() override; + bool Prepare() final; // Get a fault response when HTTP status is not OK. - // TODO: Avoid copy. - static HttpResponse Fault(http::Status status); + static HttpResponsePtr Fault(http::Status status); private: int status_; }; -typedef std::shared_ptr HttpResponsePtr; - } // namespace webcc #endif // WEBCC_HTTP_RESPONSE_H_ diff --git a/webcc/rest_request_handler.cc b/webcc/rest_request_handler.cc index b105e1c..b41ce38 100644 --- a/webcc/rest_request_handler.cc +++ b/webcc/rest_request_handler.cc @@ -5,6 +5,7 @@ #include "webcc/logger.h" #include "webcc/url.h" +#include "webcc/zlib_wrapper.h" namespace webcc { @@ -32,18 +33,32 @@ void RestRequestHandler::HandleConnection(HttpConnectionPtr connection) { return; } - // TODO: Let the service to provide the media-type and charset. RestResponse rest_response; service->Handle(rest_request, &rest_response); + auto http_response = std::make_shared(rest_response.status); + if (!rest_response.content.empty()) { - connection->SetResponseContent(std::move(rest_response.content), - http::media_types::kApplicationJson, - http::charsets::kUtf8); + if (!rest_response.media_type.empty()) { + http_response->SetContentType(rest_response.media_type, + rest_response.charset); + } + + // Only support gzip for response compression. + if (rest_response.content.size() > kGzipThreshold && + http_request.AcceptEncodingGzip()) { + std::string compressed; + if (Compress(rest_response.content, &compressed)) { + http_response->SetHeader(http::headers::kContentEncoding, "gzip"); + http_response->SetContent(std::move(compressed), true); + } + } else { + http_response->SetContent(std::move(rest_response.content), true); + } } // Send response back to client. - connection->SendResponse(rest_response.status); + connection->SendResponse(http_response); } } // namespace webcc diff --git a/webcc/rest_service.h b/webcc/rest_service.h index 2cd75c9..99ee846 100644 --- a/webcc/rest_service.h +++ b/webcc/rest_service.h @@ -39,7 +39,11 @@ struct RestRequest { struct RestResponse { http::Status status; + std::string content; + + std::string media_type; + std::string charset; }; // ----------------------------------------------------------------------------- diff --git a/webcc/soap_request_handler.cc b/webcc/soap_request_handler.cc index 4e20800..4568cf3 100644 --- a/webcc/soap_request_handler.cc +++ b/webcc/soap_request_handler.cc @@ -16,18 +16,24 @@ bool SoapRequestHandler::Bind(SoapServicePtr service, const std::string& url) { } void SoapRequestHandler::HandleConnection(HttpConnectionPtr connection) { - std::string path = "/" + connection->request().url().path(); + auto http_response = std::make_shared(); + + // TODO: Support keep-alive. + http_response->SetHeader(http::headers::kConnection, "Close"); + std::string path = "/" + connection->request().url().path(); SoapServicePtr service = GetServiceByUrl(path); if (!service) { - connection->SendResponse(http::Status::kBadRequest); + http_response->set_status(http::Status::kBadRequest); + connection->SendResponse(http_response); return; } // Parse the SOAP request XML. SoapRequest soap_request; if (!soap_request.FromXml(connection->request().content())) { - connection->SendResponse(http::Status::kBadRequest); + http_response->set_status(http::Status::kBadRequest); + connection->SendResponse(http_response); return; } @@ -42,24 +48,26 @@ void SoapRequestHandler::HandleConnection(HttpConnectionPtr connection) { } if (!service->Handle(soap_request, &soap_response)) { - connection->SendResponse(http::Status::kBadRequest); + http_response->set_status(http::Status::kBadRequest); + connection->SendResponse(http_response); return; } std::string content; soap_response.ToXml(format_raw_, indent_str_, &content); + // TODO: Let the service provide charset. if (soap_version_ == kSoapV11) { - connection->SetResponseContent(std::move(content), - http::media_types::kTextXml, - http::charsets::kUtf8); + http_response->SetContentType(http::media_types::kTextXml, + http::charsets::kUtf8); } else { - connection->SetResponseContent(std::move(content), - http::media_types::kApplicationSoapXml, - http::charsets::kUtf8); + http_response->SetContentType(http::media_types::kApplicationSoapXml, + http::charsets::kUtf8); } - connection->SendResponse(http::Status::kOK); + http_response->set_status(http::Status::kOK); + + connection->SendResponse(http_response); } SoapServicePtr SoapRequestHandler::GetServiceByUrl(const std::string& url) { diff --git a/webcc/zlib_wrapper.cc b/webcc/zlib_wrapper.cc index ea96dc3..dbf5567 100644 --- a/webcc/zlib_wrapper.cc +++ b/webcc/zlib_wrapper.cc @@ -1,5 +1,6 @@ #include "webcc/zlib_wrapper.h" +#include #include // std::move #include "zlib.h" @@ -8,11 +9,61 @@ namespace webcc { +bool Compress(const std::string& input, std::string* output) { + output->clear(); + + if (input.empty()) { + return true; + } + + z_stream stream; + stream.next_in = (Bytef*)input.data(); + stream.avail_in = (uInt)input.size(); + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + + int ret = deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + MAX_WBITS + 16, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { + return false; + } + + std::string buf; + buf.resize(input.size() / 2); // TODO + + // Run deflate() on input until output buffer is not full. + do { + stream.avail_out = (uInt)buf.size(); + stream.next_out = (Bytef*)buf.data(); + + int err = deflate(&stream, Z_FINISH); + + assert(err != Z_STREAM_ERROR); + + if (err != Z_OK) { + deflateEnd(&stream); + if (stream.msg != nullptr) { + LOG_ERRO("zlib deflate error: %s", stream.msg); + } + return false; + } + + std::size_t size = buf.size() - stream.avail_out; + output->insert(output->end(), buf.data(), buf.data() + size); + } while (stream.avail_out == 0); + + if (deflateEnd(&stream) != Z_OK) { + return false; + } + + return true; +} + // Modified from: // http://windrealm.org/tutorials/decompress-gzip-stream.php - -bool Decompress(const std::string& input, std::string& output) { - output.clear(); +bool Decompress(const std::string& input, std::string* output) { + output->clear(); if (input.empty()) { return true; @@ -22,12 +73,12 @@ bool Decompress(const std::string& input, std::string& output) { std::string buf; buf.resize(input.size()); - z_stream strm; - strm.next_in = (Bytef*)input.c_str(); - strm.avail_in = (uInt)input.size(); - strm.total_out = 0; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; + z_stream stream; + stream.next_in = (Bytef*)input.data(); + stream.avail_in = (uInt)input.size(); + stream.total_out = 0; + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; // About the windowBits paramter: // (https://stackoverflow.com/a/1838702) @@ -35,45 +86,41 @@ bool Decompress(const std::string& input, std::string& output) { // windowBits can also be greater than 15 for optional gzip decoding. Add 32 // to windowBits to enable zlib and gzip decoding with automatic header // detection, or add 16 to decode only the gzip format (the zlib format will - // return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is - // a crc32 instead of an adler32. - if (inflateInit2(&strm, (32 + MAX_WBITS)) != Z_OK) { + // return a Z_DATA_ERROR). + if (inflateInit2(&stream, MAX_WBITS + 32) != Z_OK) { return false; } while (true) { // Enlarge the output buffer if it's too small. - if (strm.total_out >= buf.size()) { + if (stream.total_out >= buf.size()) { buf.resize(buf.size() + input.size() / 2); } - strm.next_out = (Bytef*)(buf.c_str() + strm.total_out); - strm.avail_out = (uInt)buf.size() - strm.total_out; + stream.next_out = (Bytef*)(buf.data() + stream.total_out); + stream.avail_out = (uInt)buf.size() - stream.total_out; // Inflate another chunk. - //int err = inflate(&strm, Z_SYNC_FLUSH); - int err = inflate(&strm, Z_FULL_FLUSH); + int err = inflate(&stream, Z_SYNC_FLUSH); if (err == Z_STREAM_END) { break; } else if (err != Z_OK) { - inflateEnd(&strm); - if (strm.msg != nullptr) { - LOG_ERRO("zlib inflate error: %s", strm.msg); + inflateEnd(&stream); + if (stream.msg != nullptr) { + LOG_ERRO("zlib inflate error: %s", stream.msg); } return false; } } - if (inflateEnd(&strm) != Z_OK) { + if (inflateEnd(&stream) != Z_OK) { return false; } - // Remove the unused buffer. - buf.erase(strm.total_out); - - // Move the buffer to the output. - output = std::move(buf); + // Remove the unused part then move to the output + buf.erase(stream.total_out); + *output = std::move(buf); return true; } diff --git a/webcc/zlib_wrapper.h b/webcc/zlib_wrapper.h index 98d741d..26b3c9d 100644 --- a/webcc/zlib_wrapper.h +++ b/webcc/zlib_wrapper.h @@ -5,7 +5,12 @@ namespace webcc { -bool Decompress(const std::string& input, std::string& output); +// Compress the input string to gzip format output. +bool Compress(const std::string& input, std::string* output); + +// Decompress the input string with auto detecting both gzip and zlib (deflate) +// formats. +bool Decompress(const std::string& input, std::string* output); } // namespace webcc