You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
179 lines
6.4 KiB
179 lines
6.4 KiB
9 months ago
|
From 05f6af2f4c85cc99323cfff6149c3d74af661b6d Mon Sep 17 00:00:00 2001
|
||
|
From: Amos Jeffries <yadij@users.noreply.github.com>
|
||
|
Date: Fri, 13 Oct 2023 08:44:16 +0000
|
||
|
Subject: [PATCH] RFC 9112: Improve HTTP chunked encoding compliance (#1498)
|
||
|
|
||
|
---
|
||
|
src/http/one/Parser.cc | 8 +-------
|
||
|
src/http/one/Parser.h | 4 +---
|
||
|
src/http/one/TeChunkedParser.cc | 23 ++++++++++++++++++-----
|
||
|
src/parser/Tokenizer.cc | 12 ++++++++++++
|
||
|
src/parser/Tokenizer.h | 7 +++++++
|
||
|
5 files changed, 39 insertions(+), 15 deletions(-)
|
||
|
|
||
|
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
|
||
|
index c78ddd7f0..291ae39f0 100644
|
||
|
--- a/src/http/one/Parser.cc
|
||
|
+++ b/src/http/one/Parser.cc
|
||
|
@@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters()
|
||
|
void
|
||
|
Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
|
||
|
{
|
||
|
- if (tok.skip(Http1::CrLf()))
|
||
|
- return;
|
||
|
-
|
||
|
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
|
||
|
return;
|
||
|
|
||
|
- if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
|
||
|
- throw InsufficientInput();
|
||
|
-
|
||
|
- throw TexcHere("garbage instead of CRLF line terminator");
|
||
|
+ tok.skipRequired("line-terminating CRLF", Http1::CrLf());
|
||
|
}
|
||
|
|
||
|
/// all characters except the LF line terminator
|
||
|
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
|
||
|
index f83c01a9a..aab895583 100644
|
||
|
--- a/src/http/one/Parser.h
|
||
|
+++ b/src/http/one/Parser.h
|
||
|
@@ -124,9 +124,7 @@ protected:
|
||
|
* detect and skip the CRLF or (if tolerant) LF line terminator
|
||
|
* consume from the tokenizer.
|
||
|
*
|
||
|
- * \throws exception on bad or InsuffientInput.
|
||
|
- * \retval true only if line terminator found.
|
||
|
- * \retval false incomplete or missing line terminator, need more data.
|
||
|
+ * \throws exception on bad or InsufficientInput
|
||
|
*/
|
||
|
void skipLineTerminator(Tokenizer &) const;
|
||
|
|
||
|
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
|
||
|
index 1434100b6..8bdb65abb 100644
|
||
|
--- a/src/http/one/TeChunkedParser.cc
|
||
|
+++ b/src/http/one/TeChunkedParser.cc
|
||
|
@@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
|
||
|
{
|
||
|
Must(theChunkSize <= 0); // Should(), really
|
||
|
|
||
|
+ static const SBuf bannedHexPrefixLower("0x");
|
||
|
+ static const SBuf bannedHexPrefixUpper("0X");
|
||
|
+ if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
|
||
|
+ throw TextException("chunk starts with 0x", Here());
|
||
|
+
|
||
|
int64_t size = -1;
|
||
|
if (tok.int64(size, 16, false) && !tok.atEnd()) {
|
||
|
if (size < 0)
|
||
|
@@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
|
||
|
// bad or insufficient input, like in the code below. TODO: Expand up.
|
||
|
try {
|
||
|
parseChunkExtensions(tok); // a possibly empty chunk-ext list
|
||
|
- skipLineTerminator(tok);
|
||
|
+ tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
|
||
|
buf_ = tok.remaining();
|
||
|
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
|
||
|
return true;
|
||
|
@@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
|
||
|
// other exceptions bubble up to kill message parsing
|
||
|
}
|
||
|
|
||
|
-/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
|
||
|
+/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
|
||
|
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
void
|
||
|
-Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
|
||
|
{
|
||
|
do {
|
||
|
+ auto tok = callerTok;
|
||
|
+
|
||
|
ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
|
||
|
|
||
|
if (!tok.skip(';'))
|
||
|
@@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
|
||
|
|
||
|
parseOneChunkExtension(tok);
|
||
|
buf_ = tok.remaining(); // got one extension
|
||
|
+ callerTok = tok;
|
||
|
} while (true);
|
||
|
}
|
||
|
|
||
|
@@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName
|
||
|
/// Parses a single chunk-ext list element:
|
||
|
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
void
|
||
|
-Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
|
||
|
{
|
||
|
+ auto tok = callerTok;
|
||
|
+
|
||
|
ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
|
||
|
|
||
|
const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
|
||
|
+ callerTok = tok; // in case we determine that this is a valueless chunk-ext
|
||
|
|
||
|
ParseBws(tok);
|
||
|
|
||
|
@@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
|
||
|
customExtensionValueParser->parse(tok, extName);
|
||
|
else
|
||
|
ChunkExtensionValueParser::Ignore(tok, extName);
|
||
|
+
|
||
|
+ callerTok = tok;
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
@@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
|
||
|
Must(theLeftBodySize == 0); // Should(), really
|
||
|
|
||
|
try {
|
||
|
- skipLineTerminator(tok);
|
||
|
+ tok.skipRequired("chunk CRLF", Http1::CrLf());
|
||
|
buf_ = tok.remaining(); // parse checkpoint
|
||
|
theChunkSize = 0; // done with the current chunk
|
||
|
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
|
||
|
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
|
||
|
index edaffd8d3..15df793b8 100644
|
||
|
--- a/src/parser/Tokenizer.cc
|
||
|
+++ b/src/parser/Tokenizer.cc
|
||
|
@@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
|
||
|
return success(prefixLen);
|
||
|
}
|
||
|
|
||
|
+void
|
||
|
+Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
|
||
|
+{
|
||
|
+ if (skip(tokenToSkip) || tokenToSkip.isEmpty())
|
||
|
+ return;
|
||
|
+
|
||
|
+ if (tokenToSkip.startsWith(buf_))
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ throw TextException(ToSBuf("cannot skip ", description), Here());
|
||
|
+}
|
||
|
+
|
||
|
bool
|
||
|
Parser::Tokenizer::skipOne(const CharacterSet &chars)
|
||
|
{
|
||
|
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
|
||
|
index 7bae1ccbb..3cfa7dd6c 100644
|
||
|
--- a/src/parser/Tokenizer.h
|
||
|
+++ b/src/parser/Tokenizer.h
|
||
|
@@ -115,6 +115,13 @@ public:
|
||
|
*/
|
||
|
SBuf::size_type skipAll(const CharacterSet &discardables);
|
||
|
|
||
|
+ /** skips a given character sequence (string);
|
||
|
+ * does nothing if the sequence is empty
|
||
|
+ *
|
||
|
+ * \throws exception on mismatching prefix or InsufficientInput
|
||
|
+ */
|
||
|
+ void skipRequired(const char *description, const SBuf &tokenToSkip);
|
||
|
+
|
||
|
/** Removes a single trailing character from the set.
|
||
|
*
|
||
|
* \return whether a character was removed
|
||
|
--
|
||
|
2.25.1
|
||
|
|