You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
4.3 KiB
127 lines
4.3 KiB
5 months ago
|
From b3b4d0847c0b22a6f2b12090d8b6b79c4cdea95c Mon Sep 17 00:00:00 2001
|
||
|
From: Bert JW Regeer <bertjw@regeer.org>
|
||
|
Date: Sat, 12 Mar 2022 18:30:30 -0700
|
||
|
Subject: [PATCH 1/8] Add new regular expressions for Chunked Encoding
|
||
|
|
||
|
This also moves some regular expressions for QUOTED_PAIR/QUOTED_STRING
|
||
|
into this module from utilities so that they may be reused.
|
||
|
|
||
|
(cherry picked from commit e75b0d9afbea8a933f8f5f11d279e661cbfd676b)
|
||
|
---
|
||
|
waitress/rfc7230.py | 27 ++++++++++++++++++++++++++-
|
||
|
waitress/utilities.py | 28 +++-------------------------
|
||
|
2 files changed, 29 insertions(+), 26 deletions(-)
|
||
|
|
||
|
diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py
|
||
|
index cd33c90..4c4c0a9 100644
|
||
|
--- a/waitress/rfc7230.py
|
||
|
+++ b/waitress/rfc7230.py
|
||
|
@@ -7,6 +7,9 @@ import re
|
||
|
|
||
|
from .compat import tobytes
|
||
|
|
||
|
+HEXDIG = "[0-9a-fA-F]"
|
||
|
+DIGIT = "[0-9]"
|
||
|
+
|
||
|
WS = "[ \t]"
|
||
|
OWS = WS + "{0,}?"
|
||
|
RWS = WS + "{1,}?"
|
||
|
@@ -27,6 +30,12 @@ TOKEN = TCHAR + "{1,}"
|
||
|
# ; visible (printing) characters
|
||
|
VCHAR = r"\x21-\x7e"
|
||
|
|
||
|
+# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
|
||
|
+QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"
|
||
|
+
|
||
|
+QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
|
||
|
+QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'
|
||
|
+
|
||
|
# header-field = field-name ":" OWS field-value OWS
|
||
|
# field-name = token
|
||
|
# field-value = *( field-content / obs-fold )
|
||
|
@@ -45,8 +54,24 @@ FIELD_CONTENT = FIELD_VCHAR + "+(?:[ \t]+" + FIELD_VCHAR + "+)*"
|
||
|
# Which allows the field value here to just see if there is even a value in the first place
|
||
|
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
|
||
|
|
||
|
-HEADER_FIELD = re.compile(
|
||
|
+# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
|
||
|
+# chunk-ext-name = token
|
||
|
+# chunk-ext-val = token / quoted-string
|
||
|
+
|
||
|
+CHUNK_EXT_NAME = TOKEN
|
||
|
+CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
|
||
|
+CHUNK_EXT = (
|
||
|
+ "(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
|
||
|
+)
|
||
|
+
|
||
|
+# Pre-compiled regular expressions for use elsewhere
|
||
|
+ONLY_HEXDIG_RE = re.compile(tobytes("^" + HEXDIG + "+$"))
|
||
|
+ONLY_DIGIT_RE = re.compile(tobytes("^" + DIGIT + "+$"))
|
||
|
+HEADER_FIELD_RE = re.compile(
|
||
|
tobytes(
|
||
|
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
|
||
|
)
|
||
|
)
|
||
|
+QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
|
||
|
+QUOTED_STRING_RE = re.compile(QUOTED_STRING)
|
||
|
+CHUNK_EXT_RE = re.compile(tobytes("^" + CHUNK_EXT + "$"))
|
||
|
diff --git a/waitress/utilities.py b/waitress/utilities.py
|
||
|
index 556bed2..fa59657 100644
|
||
|
--- a/waitress/utilities.py
|
||
|
+++ b/waitress/utilities.py
|
||
|
@@ -22,7 +22,7 @@ import re
|
||
|
import stat
|
||
|
import time
|
||
|
|
||
|
-from .rfc7230 import OBS_TEXT, VCHAR
|
||
|
+from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE
|
||
|
|
||
|
logger = logging.getLogger("waitress")
|
||
|
queue_logger = logging.getLogger("waitress.queue")
|
||
|
@@ -216,32 +216,10 @@ def parse_http_date(d):
|
||
|
return retval
|
||
|
|
||
|
|
||
|
-# RFC 5234 Appendix B.1 "Core Rules":
|
||
|
-# VCHAR = %x21-7E
|
||
|
-# ; visible (printing) characters
|
||
|
-vchar_re = VCHAR
|
||
|
-
|
||
|
-# RFC 7230 Section 3.2.6 "Field Value Components":
|
||
|
-# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
|
||
|
-# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
|
||
|
-# obs-text = %x80-FF
|
||
|
-# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
|
||
|
-obs_text_re = OBS_TEXT
|
||
|
-
|
||
|
-# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
|
||
|
-qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
|
||
|
-
|
||
|
-quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
|
||
|
-quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'
|
||
|
-
|
||
|
-quoted_string = re.compile(quoted_string_re)
|
||
|
-quoted_pair = re.compile(quoted_pair_re)
|
||
|
-
|
||
|
-
|
||
|
def undquote(value):
|
||
|
if value.startswith('"') and value.endswith('"'):
|
||
|
# So it claims to be DQUOTE'ed, let's validate that
|
||
|
- matches = quoted_string.match(value)
|
||
|
+ matches = QUOTED_STRING_RE.match(value)
|
||
|
|
||
|
if matches and matches.end() == len(value):
|
||
|
# Remove the DQUOTE's from the value
|
||
|
@@ -249,7 +227,7 @@ def undquote(value):
|
||
|
|
||
|
# Remove all backslashes that are followed by a valid vchar or
|
||
|
# obs-text
|
||
|
- value = quoted_pair.sub(r"\1", value)
|
||
|
+ value = QUOTED_PAIR_RE.sub(r"\1", value)
|
||
|
|
||
|
return value
|
||
|
elif not value.startswith('"') and not value.endswith('"'):
|
||
|
--
|
||
|
2.45.2
|
||
|
|