Refine parser

master
Chunting Gu 6 years ago
parent 2a868c4dc1
commit 2fe4024511

@ -2,7 +2,7 @@
set(UT_SRCS
base64_unittest.cc
parser_unittest.cc
request_parser_unittest.cc
rest_service_manager_unittest.cc
url_unittest.cc
)

@ -2,10 +2,6 @@
#include "webcc/request.h"
#include "webcc/request_parser.h"
#include "webcc/response.h"
#include "webcc/response_parser.h"
#include <iostream>
// -----------------------------------------------------------------------------
@ -30,7 +26,7 @@ protected:
EXPECT_EQ("Close", request_.GetHeader("Connection"));
EXPECT_EQ("", request_.content());
EXPECT_EQ(0, request_.content_length());
EXPECT_EQ(webcc::kInvalidLength, request_.content_length());
}
std::string payload_;
@ -39,7 +35,7 @@ protected:
webcc::RequestParser parser_;
};
TEST_F(GetRequestParserTest, ParseFullDataOnce) {
TEST_F(GetRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok);
@ -125,7 +121,7 @@ protected:
webcc::RequestParser parser_;
};
TEST_F(PostRequestParserTest, ParseFullDataOnce) {
TEST_F(PostRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok);
@ -213,7 +209,7 @@ protected:
webcc::RequestParser parser_;
};
TEST_F(MultipartRequestParserTest, ParseFullDataOnce) {
TEST_F(MultipartRequestParserTest, ParseOnce) {
bool ok = parser_.Parse(payload_.data(), payload_.size());
EXPECT_TRUE(ok);

@ -34,6 +34,10 @@ public:
return content_length_;
}
void set_content_length(std::size_t content_length) {
content_length_ = content_length;
}
const std::string& content() const {
return content_;
}

@ -139,54 +139,40 @@ bool Parser::GetNextLine(std::size_t off, std::string* line, bool erase) {
bool Parser::ParseHeaderLine(const std::string& line) {
Header header;
if (!Split2(line, ':', &header.first, &header.second)) {
LOG_ERRO("Invalid header: %s", line.c_str());
return false;
}
do {
if (!chunked_ && !content_length_parsed_) {
if (boost::iequals(header.first, headers::kContentLength)) {
content_length_parsed_ = true;
if (boost::iequals(header.first, headers::kContentLength)) {
content_length_parsed_ = true;
if (!StringToSizeT(header.second, 10, &content_length_)) {
LOG_ERRO("Invalid content length: %s.", header.second.c_str());
return false;
}
LOG_INFO("Content length: %u.", content_length_);
// Reserve memory to avoid frequent reallocation when append.
try {
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
}
break;
}
if (!StringToSizeT(header.second, 10, &content_length_)) {
LOG_ERRO("Invalid content length: %s.", header.second.c_str());
return false;
}
// TODO: Replace `!chunked_` with <TransferEncodingParsed>.
if (!chunked_ && !content_length_parsed_) {
if (boost::iequals(header.first, headers::kTransferEncoding)) {
if (header.second == "chunked") {
// The content is chunked.
chunked_ = true;
}
LOG_INFO("Content length: %u.", content_length_);
break;
}
// Reserve memory to avoid frequent reallocation when append.
try {
content_.reserve(content_length_);
} catch (const std::exception& e) {
LOG_ERRO("Failed to reserve content memory: %s.", e.what());
return false;
}
} while (false);
// Parse Content-Type.
if (boost::iequals(header.first, headers::kContentType)) {
} else if (boost::iequals(header.first, headers::kContentType)) {
ContentType content_type(header.second);
if (!content_type.Valid()) {
LOG_ERRO("Invalid content-type header: %s", header.second.c_str());
return false;
} else {
message_->SetContentType(content_type);
}
} else if (boost::iequals(header.first, headers::kTransferEncoding)) {
if (header.second == "chunked") {
// The content is chunked.
chunked_ = true;
}
message_->SetContentType(content_type);
}
message_->SetHeader(std::move(header));
@ -220,7 +206,7 @@ bool Parser::ParseFixedContent(const char* data, std::size_t length) {
pending_data_.clear();
}
// NOTE: Don't have to firstly put the data to the pending data.
// Don't have to firstly put the data to the pending data.
AppendContent(data, length);
if (IsContentFull()) {
@ -232,13 +218,8 @@ bool Parser::ParseFixedContent(const char* data, std::size_t length) {
}
bool Parser::ParseChunkedContent(const char* data, std::size_t length) {
// Append the new data to the pending data.
// NOTE: It's more difficult to avoid this than fixed-length content.
pending_data_.append(data, length);
LOG_VERB("Parse chunked content (pending data size: %u).",
pending_data_.size());
while (true) {
// Read chunk-size if necessary.
if (chunk_size_ == kInvalidLength) {
@ -321,6 +302,9 @@ bool Parser::Finish() {
return true;
}
// Could be kInvalidLength when chunked.
message_->set_content_length(content_length_);
if (!IsContentCompressed()) {
message_->SetContent(std::move(content_), false);
return true;

@ -59,6 +59,7 @@ protected:
void AppendContent(const char* data, std::size_t count);
void AppendContent(const std::string& data);
// TODO: Rename to IsFixedContentFull.
bool IsContentFull() const;
// Check header Content-Encoding to see if the content is compressed.

@ -48,13 +48,8 @@ bool RequestParser::ParseContent(const char* data, std::size_t length) {
bool RequestParser::ParseMultipartContent(const char* data,
std::size_t length) {
// Append the new data to the pending data.
// NOTE: It's more difficult to avoid this than normal fixed-length content.
pending_data_.append(data, length);
LOG_VERB("Parse multipart content (3pending data size: %u).",
pending_data_.size());
if (!content_length_parsed_ || content_length_ == kInvalidLength) {
// Invalid content length (syntax error).
return false;
@ -71,7 +66,7 @@ bool RequestParser::ParseMultipartContent(const char* data,
if (!GetNextLine(0, &line, true)) {
break; // Not enough data
}
if (!IsBoundary(line)) {
if (!IsBoundary(line, 0, line.size())) {
LOG_ERRO("Invalid boundary: %s", line.c_str());
return false;
}
@ -105,12 +100,13 @@ bool RequestParser::ParseMultipartContent(const char* data,
std::size_t off = 0;
std::size_t count = 0;
bool ended = false;
// TODO: Remember last CRLF position.
if (!GetNextBoundaryLine(&off, &count, &ended)) {
// Wait until next boundary.
break;
}
LOG_INFO("Next boundary has been found.");
LOG_INFO("Next boundary found.");
// This part has ended.
if (off > 2) {
@ -218,40 +214,40 @@ bool RequestParser::GetNextBoundaryLine(std::size_t* b_off,
continue; // Empty line
}
// TODO: Avoid temp string.
std::string line = pending_data_.substr(off, count);
if (IsBoundary(line)) {
if (IsBoundary(pending_data_, off, count, ended)) {
*b_off = off;
*b_count = count;
return true;
}
if (IsBoundaryEnd(line)) {
*b_off = off;
*b_count = count;
*ended = true;
return true;
}
off = pos + 2;
}
return false;
}
bool RequestParser::IsBoundary(const std::string& line) const {
if (line == "--" + request_->content_type().boundary()) {
return true;
bool RequestParser::IsBoundary(const std::string& str, std::size_t off,
std::size_t count, bool* end) const {
const std::string& boundary = request_->content_type().boundary();
if (count != boundary.size() + 2 && count != boundary.size() + 4) {
return false;
}
if (str[off] != '-' || str[off + 1] != '-') {
return false;
}
return false;
}
bool RequestParser::IsBoundaryEnd(const std::string& line) const {
if (line == "--" + request_->content_type().boundary() + "--") {
return true;
if (count == boundary.size() + 4) {
if (str[off + count - 1] != '-' || str[off + count - 2] != '-') {
return false;
}
if (end != nullptr) {
*end = true;
}
}
return false;
return strncmp(boundary.c_str(), &str[off + 2], boundary.size()) == 0;
}
} // namespace webcc

@ -29,8 +29,10 @@ private:
bool ParsePartHeaders(bool* need_more_data);
bool GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_count,
bool* ended);
bool IsBoundary(const std::string& line) const;
bool IsBoundaryEnd(const std::string& line) const;
// Check if the str.substr(off, count) is a boundary.
bool IsBoundary(const std::string& str, std::size_t off,
std::size_t count, bool* end = nullptr) const;
private:
Request* request_;

Loading…
Cancel
Save