Fix content-type parsing issue; refine http parser.

7 years ago · df4bb33bd8
parent 5e1b14e74d
commit df4bb33bd8
9 changed files with 76 additions and 51 deletions
--- a/examples/file_upload_client.cc
+++ b/examples/file_upload_client.cc
@ -50,8 +50,7 @@ int main(int argc, char* argv[]) {
  webcc::HttpClientSession session;

  try {
-  //  auto r = session.PostFile(url, "file",
-  //                            upload_dir / "remember.txt");
+    //auto r = session.PostFile(url, "file", upload_dir / "remember.txt");

    auto r = session.Request(webcc::HttpRequestBuilder{}.Post().
                             Url(url).
--- a/examples/github_client.cc
+++ b/examples/github_client.cc
@ -109,7 +109,8 @@ void CreateAuthorization(webcc::HttpClientSession& session,
                         const std::string& auth) {
  try {

-    std::string data = "{'note': 'Webcc test', 'scopes': ['public_repo', 'repo', 'repo:status', 'user']}";
+    std::string data = "{'note': 'Webcc test', 'scopes': ['public_repo',\
+                       'repo', 'repo:status', 'user']}";

    auto r = session.Post(kUrlRoot + "/authorizations", std::move(data), true,
                          {"Authorization", auth});
--- a/examples/http_client.cc
+++ b/examples/http_client.cc
@ -15,12 +15,12 @@ bool kSslVerify = true;
 void ExampleBasic() {
  webcc::HttpClientSession session;

-  auto r = session.Request(webcc::HttpRequestBuilder{}
-                               .Get()
-                               .Url("http://httpbin.org/get")
-                               .Query("key1", "value1")
-                               .Query("key2", "value2")
-                               .Header("Accept", "application/json")());
+  auto r = session.Request(webcc::HttpRequestBuilder{}.Get().
+                           Url("http://httpbin.org/get").
+                           Query("key1", "value1").
+                           Query("key2", "value2").
+                           Header("Accept", "application/json")
+                           ());

  std::cout << r->content() << std::endl;
 }
@ -36,20 +36,23 @@ void ExampleShortcut() {
  std::cout << r->content() << std::endl;
 }

+#if WEBCC_ENABLE_SSL
+
 // HTTPS is auto-detected from the URL scheme.
 void ExampleHttps() {
  webcc::HttpClientSession session;
  session.set_ssl_verify(kSslVerify);

-  auto r = session.Request(webcc::HttpRequestBuilder{}
-                               .Get()
-                               .Url("https://httpbin.org/get")
-                               .Query("key1", "value1")
-                               .Header("Accept", "application/json")());
+  auto r = session.Request(webcc::HttpRequestBuilder{}.Get().
+                           Url("https://httpbin.org/get").
+                           Query("key1", "value1").
+                           Header("Accept", "application/json")());

  std::cout << r->content() << std::endl;
 }

+#endif  // WEBCC_ENABLE_SSL
+
 // Example for testing Keep-Alive connection.
 //
 // Boost.org doesn't support persistent connection so always includes
@ -106,6 +109,8 @@ void ExampleImage(const std::string& path) {
 int main() {
  WEBCC_LOG_INIT("", webcc::LOG_CONSOLE);

+  webcc::HttpClientSession session;
+
  try {

    ExampleBasic();
--- a/webcc/common.cc
+++ b/webcc/common.cc
@ -142,7 +142,17 @@ bool ContentType::Valid() const {

 void ContentType::Init(const std::string& str) {
  std::string other;
-  Split2(str, ';', &media_type_, &other);
+
+  std::size_t pos = str.find(';');
+  if (pos == std::string::npos) {
+    media_type_ = str;
+  } else {
+    media_type_ = str.substr(0, pos);
+    other = str.substr(pos + 1);
+  }
+
+  boost::trim(media_type_);
+  boost::trim(other);

  if (media_type_ == "multipart/form-data") {
    multipart_ = true;
--- a/webcc/http_message.h
+++ b/webcc/http_message.h
@ -103,8 +103,6 @@ protected:
  }

 protected:
-  // Start line with trailing CRLF.
-  // TODO: Don't include trailing CRLF since it's confusing.
  std::string start_line_;

  std::string content_;
--- a/webcc/http_parser.cc
+++ b/webcc/http_parser.cc
@ -43,28 +43,25 @@ void HttpParser::Init(HttpMessage* message) {
 }

 bool HttpParser::Parse(const char* data, std::size_t length) {
+  if (header_ended_) {
+    return ParseContent(data, length);
+  }
+
  // Append the new data to the pending data.
  pending_data_.append(data, length);

-  if (!header_ended_) {
-    // If headers not ended yet, continue to parse headers.
-    if (!ParseHeaders()) {
-      return false;
-    }
-
-    if (header_ended_) {
-      LOG_INFO("HTTP headers just ended.");
-    }
+  if (!ParseHeaders()) {
+    return false;
  }

-  // If headers still not ended, just return and wait for next read.
  if (!header_ended_) {
    LOG_INFO("HTTP headers will continue in next read.");
    return true;
+  } else {
+    LOG_INFO("HTTP headers just ended.");
+    // NOTE: The left data, if any, is still in the pending data.
+    return ParseContent("", 0);
  }
-
-  // Now, parse the content.
-  return ParseContent();
 }

 void HttpParser::Reset() {
@ -99,12 +96,14 @@ bool HttpParser::ParseHeaders() {

    if (!start_line_parsed_) {
      start_line_parsed_ = true;
-      message_->set_start_line(line + kCRLF);
+      message_->set_start_line(line);
      if (!ParseStartLine(line)) {
        return false;
      }
    } else {
-      ParseHeaderLine(line);
+      if (!ParseHeaderLine(line)) {
+        return false;
+      }
    }
  }

@ -182,7 +181,7 @@ bool HttpParser::ParseHeaderLine(const std::string& line) {
  if (boost::iequals(header.first, http::headers::kContentType)) {
    ContentType content_type(header.second);
    if (!content_type.Valid()) {
-      LOG_ERRO("Invalid content-type header.");
+      LOG_ERRO("Invalid content-type header: %s", header.second.c_str());
      return false;
    }
    message_->SetContentType(content_type);
@ -193,15 +192,15 @@ bool HttpParser::ParseHeaderLine(const std::string& line) {
  return true;
 }

-bool HttpParser::ParseContent() {
+bool HttpParser::ParseContent(const char* data, std::size_t length) {
  if (chunked_) {
-    return ParseChunkedContent();
+    return ParseChunkedContent(data, length);
  } else {
-    return ParseFixedContent();
+    return ParseFixedContent(data, length);
  }
 }

-bool HttpParser::ParseFixedContent() {
+bool HttpParser::ParseFixedContent(const char* data, std::size_t length) {
  if (!content_length_parsed_) {
    // No Content-Length, no content.
    Finish();
@ -213,10 +212,14 @@ bool HttpParser::ParseFixedContent() {
    return false;
  }

-  // TODO: Avoid copy using std::move.
-  AppendContent(pending_data_);
+  if (!pending_data_.empty()) {
+    // This is the data left after the headers are parsed.
+    AppendContent(pending_data_);
+    pending_data_.clear();
+  }

-  pending_data_.clear();
+  // NOTE: Don't have to firstly put the data to the pending data.
+  AppendContent(data, length);

  if (IsContentFull()) {
    // All content has been read.
@ -226,7 +229,11 @@ bool HttpParser::ParseFixedContent() {
  return true;
 }

-bool HttpParser::ParseChunkedContent() {
+bool HttpParser::ParseChunkedContent(const char* data, std::size_t length) {
+  // Append the new data to the pending data.
+  // NOTE: It's more difficult to avoid this than fixed-length content.
+  pending_data_.append(data, length);
+
  LOG_VERB("Parse chunked content (pending data size: %u).",
           pending_data_.size());

--- a/webcc/http_parser.h
+++ b/webcc/http_parser.h
@ -46,11 +46,11 @@ protected:

  bool ParseHeaderLine(const std::string& line);

-  virtual bool ParseContent();
+  virtual bool ParseContent(const char* data, std::size_t length);

-  bool ParseFixedContent();
+  bool ParseFixedContent(const char* data, std::size_t length);

-  bool ParseChunkedContent();
+  bool ParseChunkedContent(const char* data, std::size_t length);
  bool ParseChunkSize();

  // Return false if the compressed content cannot be decompressed.
--- a/webcc/http_request_parser.cc
+++ b/webcc/http_request_parser.cc
@ -34,19 +34,24 @@ bool HttpRequestParser::ParseStartLine(const std::string& line) {
  return true;
 }

-bool HttpRequestParser::ParseContent() {
+bool HttpRequestParser::ParseContent(const char* data, std::size_t length) {
  if (chunked_) {
-    return ParseChunkedContent();
+    return ParseChunkedContent(data, length);
  } else {
    if (request_->content_type().multipart()) {
-      return ParseMultipartContent();
+      return ParseMultipartContent(data, length);
    } else {
-      return ParseFixedContent();
+      return ParseFixedContent(data, length);
    }
  }
 }

-bool HttpRequestParser::ParseMultipartContent() {
+bool HttpRequestParser::ParseMultipartContent(const char* data,
+                                              std::size_t length) {
+  // Append the new data to the pending data.
+  // NOTE: It's more difficult to avoid this than normal fixed-length content.
+  pending_data_.append(data, length);
+
  LOG_VERB("Parse multipart content (pending data size: %u).",
           pending_data_.size());

--- a/webcc/http_request_parser.h
+++ b/webcc/http_request_parser.h
@ -21,11 +21,11 @@ private:
  bool ParseStartLine(const std::string& line) final;

  // Override to handle multipart form data which is request only.
-  bool ParseContent() final;
+  bool ParseContent(const char* data, std::size_t length) final;

  // Multipart specific parsing helpers.

-  bool ParseMultipartContent();
+  bool ParseMultipartContent(const char* data, std::size_t length);
  bool ParsePartHeaders(bool* need_more_data);
  bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count,
                           bool* ended);