Refine parser

master
Chunting Gu 6 years ago
parent 2a868c4dc1
commit 2fe4024511

@ -2,7 +2,7 @@
set(UT_SRCS set(UT_SRCS
base64_unittest.cc base64_unittest.cc
parser_unittest.cc request_parser_unittest.cc
rest_service_manager_unittest.cc rest_service_manager_unittest.cc
url_unittest.cc url_unittest.cc
) )

@ -2,10 +2,6 @@
#include "webcc/request.h" #include "webcc/request.h"
#include "webcc/request_parser.h" #include "webcc/request_parser.h"
#include "webcc/response.h"
#include "webcc/response_parser.h"
#include <iostream>
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -30,7 +26,7 @@ protected:
EXPECT_EQ("Close", request_.GetHeader("Connection")); EXPECT_EQ("Close", request_.GetHeader("Connection"));
EXPECT_EQ("", request_.content()); EXPECT_EQ("", request_.content());
EXPECT_EQ(0, request_.content_length()); EXPECT_EQ(webcc::kInvalidLength, request_.content_length());
} }
std::string payload_; std::string payload_;
@ -39,7 +35,7 @@ protected:
webcc::RequestParser parser_; webcc::RequestParser parser_;
}; };
TEST_F(GetRequestParserTest, ParseFullDataOnce) { TEST_F(GetRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size()); bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok); EXPECT_TRUE(ok);
@ -125,7 +121,7 @@ protected:
webcc::RequestParser parser_; webcc::RequestParser parser_;
}; };
TEST_F(PostRequestParserTest, ParseFullDataOnce) { TEST_F(PostRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size()); bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok); EXPECT_TRUE(ok);
@ -213,7 +209,7 @@ protected:
webcc::RequestParser parser_; webcc::RequestParser parser_;
}; };
TEST_F(MultipartRequestParserTest, ParseFullDataOnce) { TEST_F(MultipartRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size()); bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok); EXPECT_TRUE(ok);

@ -34,6 +34,10 @@ public:
return content_length_; return content_length_;
} }
void set_content_length(std::size_t content_length) {
content_length_ = content_length;
}
const std::string& content() const { const std::string& content() const {
return content_; return content_;
} }

@ -139,54 +139,40 @@ bool Parser::GetNextLine(std::size_t off, std::string* line, bool erase) {
bool Parser::ParseHeaderLine(const std::string& line) { bool Parser::ParseHeaderLine(const std::string& line) {
Header header; Header header;
if (!Split2(line, ':', &header.first, &header.second)) { if (!Split2(line, ':', &header.first, &header.second)) {
LOG_ERRO("Invalid header: %s", line.c_str());
return false; return false;
} }
do { if (boost::iequals(header.first, headers::kContentLength)) {
if (!chunked_ && !content_length_parsed_) { content_length_parsed_ = true;
if (boost::iequals(header.first, headers::kContentLength)) {
content_length_parsed_ = true;
if (!StringToSizeT(header.second, 10, &content_length_)) { if (!StringToSizeT(header.second, 10, &content_length_)) {
LOG_ERRO("Invalid content length: %s.", header.second.c_str()); LOG_ERRO("Invalid content length: %s.", header.second.c_str());
return false; return false;
}
LOG_INFO("Content length: %u.", content_length_);
// Reserve memory to avoid frequent reallocation when append.
try {
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
}
break;
}
} }
// TODO: Replace `!chunked_` with <TransferEncodingParsed>. LOG_INFO("Content length: %u.", content_length_);
if (!chunked_ && !content_length_parsed_) {
if (boost::iequals(header.first, headers::kTransferEncoding)) {
if (header.second == "chunked") {
// The content is chunked.
chunked_ = true;
}
break; // Reserve memory to avoid frequent reallocation when append.
} try {
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
} }
} while (false); } else if (boost::iequals(header.first, headers::kContentType)) {
// Parse Content-Type.
if (boost::iequals(header.first, headers::kContentType)) {
ContentType content_type(header.second); ContentType content_type(header.second);
if (!content_type.Valid()) { if (!content_type.Valid()) {
LOG_ERRO("Invalid content-type header: %s", header.second.c_str()); LOG_ERRO("Invalid content-type header: %s", header.second.c_str());
return false; return false;
} else {
message_->SetContentType(content_type);
}
} else if (boost::iequals(header.first, headers::kTransferEncoding)) {
if (header.second == "chunked") {
// The content is chunked.
chunked_ = true;
} }
message_->SetContentType(content_type);
} }
message_->SetHeader(std::move(header)); message_->SetHeader(std::move(header));
@ -220,7 +206,7 @@ bool Parser::ParseFixedContent(const char* data, std::size_t length) {
pending_data_.clear(); pending_data_.clear();
} }
// NOTE: Don't have to firstly put the data to the pending data. // Don't have to firstly put the data to the pending data.
AppendContent(data, length); AppendContent(data, length);
if (IsContentFull()) { if (IsContentFull()) {
@ -232,13 +218,8 @@ bool Parser::ParseFixedContent(const char* data, std::size_t length) {
} }
bool Parser::ParseChunkedContent(const char* data, std::size_t length) { bool Parser::ParseChunkedContent(const char* data, std::size_t length) {
// Append the new data to the pending data.
// NOTE: It's more difficult to avoid this than fixed-length content.
pending_data_.append(data, length); pending_data_.append(data, length);
LOG_VERB("Parse chunked content (pending data size: %u).",
pending_data_.size());
while (true) { while (true) {
// Read chunk-size if necessary. // Read chunk-size if necessary.
if (chunk_size_ == kInvalidLength) { if (chunk_size_ == kInvalidLength) {
@ -321,6 +302,9 @@ bool Parser::Finish() {
return true; return true;
} }
// Could be kInvalidLength when chunked.
message_->set_content_length(content_length_);
if (!IsContentCompressed()) { if (!IsContentCompressed()) {
message_->SetContent(std::move(content_), false); message_->SetContent(std::move(content_), false);
return true; return true;

@ -59,6 +59,7 @@ protected:
void AppendContent(const char* data, std::size_t count); void AppendContent(const char* data, std::size_t count);
void AppendContent(const std::string& data); void AppendContent(const std::string& data);
// TODO: Rename to IsFixedContentFull.
bool IsContentFull() const; bool IsContentFull() const;
// Check header Content-Encoding to see if the content is compressed. // Check header Content-Encoding to see if the content is compressed.

@ -48,13 +48,8 @@ bool RequestParser::ParseContent(const char* data, std::size_t length) {
bool RequestParser::ParseMultipartContent(const char* data, bool RequestParser::ParseMultipartContent(const char* data,
std::size_t length) { std::size_t length) {
// Append the new data to the pending data.
// NOTE: It's more difficult to avoid this than normal fixed-length content.
pending_data_.append(data, length); pending_data_.append(data, length);
LOG_VERB("Parse multipart content (3pending data size: %u).",
pending_data_.size());
if (!content_length_parsed_ || content_length_ == kInvalidLength) { if (!content_length_parsed_ || content_length_ == kInvalidLength) {
// Invalid content length (syntax error). // Invalid content length (syntax error).
return false; return false;
@ -71,7 +66,7 @@ bool RequestParser::ParseMultipartContent(const char* data,
if (!GetNextLine(0, &line, true)) { if (!GetNextLine(0, &line, true)) {
break; // Not enough data break; // Not enough data
} }
if (!IsBoundary(line)) { if (!IsBoundary(line, 0, line.size())) {
LOG_ERRO("Invalid boundary: %s", line.c_str()); LOG_ERRO("Invalid boundary: %s", line.c_str());
return false; return false;
} }
@ -105,12 +100,13 @@ bool RequestParser::ParseMultipartContent(const char* data,
std::size_t off = 0; std::size_t off = 0;
std::size_t count = 0; std::size_t count = 0;
bool ended = false; bool ended = false;
// TODO: Remember last CRLF position.
if (!GetNextBoundaryLine(&off, &count, &ended)) { if (!GetNextBoundaryLine(&off, &count, &ended)) {
// Wait until next boundary. // Wait until next boundary.
break; break;
} }
LOG_INFO("Next boundary has been found."); LOG_INFO("Next boundary found.");
// This part has ended. // This part has ended.
if (off > 2) { if (off > 2) {
@ -218,19 +214,9 @@ bool RequestParser::GetNextBoundaryLine(std::size_t* b_off,
continue; // Empty line continue; // Empty line
} }
// TODO: Avoid temp string. if (IsBoundary(pending_data_, off, count, ended)) {
std::string line = pending_data_.substr(off, count);
if (IsBoundary(line)) {
*b_off = off;
*b_count = count;
return true;
}
if (IsBoundaryEnd(line)) {
*b_off = off; *b_off = off;
*b_count = count; *b_count = count;
*ended = true;
return true; return true;
} }
@ -240,18 +226,28 @@ bool RequestParser::GetNextBoundaryLine(std::size_t* b_off,
return false; return false;
} }
bool RequestParser::IsBoundary(const std::string& line) const { bool RequestParser::IsBoundary(const std::string& str, std::size_t off,
if (line == "--" + request_->content_type().boundary()) { std::size_t count, bool* end) const {
return true; const std::string& boundary = request_->content_type().boundary();
if (count != boundary.size() + 2 && count != boundary.size() + 4) {
return false;
} }
return false;
}
bool RequestParser::IsBoundaryEnd(const std::string& line) const { if (str[off] != '-' || str[off + 1] != '-') {
if (line == "--" + request_->content_type().boundary() + "--") { return false;
return true;
} }
return false;
if (count == boundary.size() + 4) {
if (str[off + count - 1] != '-' || str[off + count - 2] != '-') {
return false;
}
if (end != nullptr) {
*end = true;
}
}
return strncmp(boundary.c_str(), &str[off + 2], boundary.size()) == 0;
} }
} // namespace webcc } // namespace webcc

@ -29,8 +29,10 @@ private:
bool ParsePartHeaders(bool* need_more_data); bool ParsePartHeaders(bool* need_more_data);
bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count, bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count,
bool* ended); bool* ended);
bool IsBoundary(const std::string& line) const;
bool IsBoundaryEnd(const std::string& line) const; // Check if the str.substr(off, count) is a boundary.
bool IsBoundary(const std::string& str, std::size_t off,
std::size_t count, bool* end = nullptr) const;
private: private:
Request* request_; Request* request_;

Loading…
Cancel
Save