You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

275 lines
6.5 KiB
C++

#include "webcc/request_parser.h"
#include <vector>
#include "boost/algorithm/string.hpp"
#include "webcc/logger.h"
#include "webcc/request.h"
#include "webcc/string.h"
#include "webcc/utility.h"
namespace webcc {
RequestParser::RequestParser() : request_(nullptr) {
}
void RequestParser::Init(Request* request, ViewMatcher view_matcher) {
assert(view_matcher);
Parser::Init(request);
request_ = request;
view_matcher_ = view_matcher;
}
bool RequestParser::OnHeadersEnd() {
bool matched = view_matcher_(request_->method(), request_->url().path(),
&stream_);
if (!matched) {
LOG_WARN("No view matches the request: %s %s", request_->method().c_str(),
request_->url().path().c_str());
}
return matched;
}
bool RequestParser::ParseStartLine(const std::string& line) {
std::vector<boost::string_view> parts;
Split(line, ' ', true, &parts);
if (parts.size() != 3) {
return false;
}
request_->set_method(parts[0].to_string());
request_->set_url(Url{ parts[1].to_string() });
// HTTP version is ignored.
return true;
}
bool RequestParser::ParseContent(const char* data, std::size_t length) {
if (content_type_.multipart()) {
return ParseMultipartContent(data, length);
} else {
return Parser::ParseContent(data, length);
}
}
bool RequestParser::ParseMultipartContent(const char* data,
std::size_t length) {
pending_data_.append(data, length);
if (!content_length_parsed_ || content_length_ == kInvalidLength) {
// Invalid content length (syntax error).
return false;
}
while (true) {
if (pending_data_.empty()) {
// Wait data from next read.
break;
}
if (step_ == Step::kStart) {
std::string line;
if (!GetNextLine(0, &line, true)) {
break; // Not enough data
}
if (!IsBoundary(line, 0, line.size())) {
LOG_ERRO("Invalid boundary: %s", line.c_str());
return false;
}
LOG_INFO("Boundary line: %s", line.c_str());
// Go to next step.
step_ = Step::kBoundaryParsed;
continue;
}
if (step_ == Step::kBoundaryParsed) {
if (!part_) {
part_.reset(new FormPart{});
}
bool need_more_data = false;
if (ParsePartHeaders(&need_more_data)) {
// Go to next step.
step_ = Step::kHeadersParsed;
LOG_INFO("Part headers just ended");
continue;
} else {
if (need_more_data) {
// Need more data from next read.
break;
} else {
return false;
}
}
}
if (step_ == Step::kHeadersParsed) {
std::size_t off = 0;
std::size_t count = 0;
bool ended = false;
// TODO: Remember last CRLF position.
if (!GetNextBoundaryLine(&off, &count, &ended)) {
break;
}
// Next boundary found.
LOG_INFO("Next boundary found, off=%u", off);
// This part has ended.
if (off >= 2) {
// -2 for excluding the CRLF after the data.
part_->AppendData(pending_data_.data(), off - 2);
// Erase the data of this part and the next boundary.
// +2 for including the CRLF after the boundary.
pending_data_.erase(0, off + count + 2);
} else {
LOG_ERRO("Invalid part data, off=%u", off);
return false;
}
// Save this part
form_parts_.push_back(part_);
// Reset for next part.
part_.reset();
if (ended) {
// Go to the end step.
step_ = Step::kEnded;
break;
} else {
// Go to next step.
step_ = Step::kBoundaryParsed;
continue;
}
}
}
if (step_ == Step::kEnded) {
LOG_INFO("Multipart data has ended");
// Create a body and set to the request.
auto body = std::make_shared<FormBody>(form_parts_,
content_type_.boundary());
request_->SetBody(body, false); // TODO: set_length?
Finish();
}
return true;
}
bool RequestParser::ParsePartHeaders(bool* need_more_data) {
std::size_t off = 0;
while (true) {
std::string line;
if (!GetNextLine(off, &line, false)) {
// Need more data from next read.
*need_more_data = true;
return false;
}
off = off + line.size() + 2; // +2 for CRLF
if (line.empty()) {
// Headers finished.
break;
}
Header header;
if (!SplitKV(line, ':', true, &header.first, &header.second)) {
LOG_ERRO("Invalid part header line: %s", line.c_str());
return false;
}
LOG_INFO("Part header (%s: %s)", header.first.c_str(),
header.second.c_str());
// Parse Content-Disposition.
if (boost::iequals(header.first, headers::kContentDisposition)) {
ContentDisposition content_disposition(header.second);
if (!content_disposition.valid()) {
LOG_ERRO("Invalid content-disposition header: %s",
header.second.c_str());
return false;
}
part_->set_name(content_disposition.name());
part_->set_file_name(content_disposition.file_name());
LOG_INFO("Content-Disposition (name=%s; filename=%s)",
part_->name().c_str(), part_->file_name().c_str());
}
// TODO: Parse other headers.
}
// Remove the data which has just been parsed.
pending_data_.erase(0, off);
return true;
}
bool RequestParser::GetNextBoundaryLine(std::size_t* b_off, std::size_t* b_len,
bool* ended) {
std::size_t off = 0;
while (true) {
std::size_t pos = pending_data_.find(kCRLF, off);
if (pos == std::string::npos) {
break;
}
std::size_t len = pos - off;
if (len == 0) {
off = pos + 2;
continue; // Empty line
}
if (IsBoundary(pending_data_, off, len, ended)) {
*b_off = off;
*b_len = len;
return true;
}
off = pos + 2;
}
return false;
}
bool RequestParser::IsBoundary(const std::string& str, std::size_t off,
std::size_t count, bool* end) const {
const std::string& boundary = content_type_.boundary();
if (count != boundary.size() + 2 && count != boundary.size() + 4) {
return false;
}
if (str[off] != '-' || str[off + 1] != '-') {
return false;
}
if (count == boundary.size() + 4) {
if (str[off + count - 1] != '-' || str[off + count - 2] != '-') {
return false;
}
if (end != nullptr) {
*end = true;
}
}
return strncmp(boundary.c_str(), &str[off + 2], boundary.size()) == 0;
}
} // namespace webcc