From 9f6aa6b5f06ecfcfea2084d88f377c6e9dba5ce2 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Tue, 30 Apr 2019 12:36:48 -0400 Subject: [PATCH 1/3] prevent CVE-2019-9740 in 1.24.x adapted from https://github.com/python/cpython/pull/12755 --- test/test_util.py | 5 +++++ src/urllib3/util/url.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/test/test_util.py b/test/test_util.py index 73d9452..dc6ffd0 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -200,6 +200,11 @@ class TestUtil(object): with pytest.raises(ValueError): parse_url('[::1') + def test_parse_url_contains_control_characters(self): + # see CVE-2019-9740 + with pytest.raises(LocationParseError): + parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + def test_Url_str(self): U = Url('http', host='google.com') assert str(U) == U.url diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index 6b6f996..e8e1bd7 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from collections import namedtuple +import re from ..exceptions import LocationParseError @@ -10,6 +11,8 @@ url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] # urllib3 infers URLs without a scheme (None) to be http. NORMALIZABLE_SCHEMES = ('http', 'https', None) +_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]') + class Url(namedtuple('Url', url_attrs)): """ @@ -155,6 +158,11 @@ def parse_url(url): # Empty return Url() + # Prevent CVE-2019-9740. + # adapted from https://github.com/python/cpython/pull/12755 + if _contains_disallowed_url_pchar_re.search(url): + raise LocationParseError("URL can't contain control characters. {!r}".format(url)) + scheme = None auth = None host = None -- 2.20.1 From ecc15bd412354ad916712113b0e426f8bc6cf52d Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Wed, 1 May 2019 16:46:44 -0400 Subject: [PATCH 2/3] avoid CVE-2019-9740 by percent-encoding invalid path characters this is to avoid breaking changes in downstream libraries like requests --- test/test_util.py | 4 ++-- src/urllib3/util/url.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index dc6ffd0..d139329 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -202,8 +202,8 @@ class TestUtil(object): def test_parse_url_contains_control_characters(self): # see CVE-2019-9740 - with pytest.raises(LocationParseError): - parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') + assert url.path == '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:' def test_Url_str(self): U = Url('http', host='google.com') diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index e8e1bd7..12b8d55 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -3,6 +3,7 @@ from collections import namedtuple import re from ..exceptions import LocationParseError +from ..packages.six.moves.urllib.parse import quote url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -160,8 +161,7 @@ def parse_url(url): # Prevent CVE-2019-9740. # adapted from https://github.com/python/cpython/pull/12755 - if _contains_disallowed_url_pchar_re.search(url): - raise LocationParseError("URL can't contain control characters. {!r}".format(url)) + url = _contains_disallowed_url_pchar_re.sub(lambda match: quote(match.group()), url) scheme = None auth = None -- 2.20.1 From 6cda449df587fd37135ee76a9253dc8e12e53c05 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 2 May 2019 09:02:24 -0500 Subject: [PATCH 3/3] Also test unicode and query --- test/test_util.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index d139329..fa53aaf 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -200,10 +200,26 @@ class TestUtil(object): with pytest.raises(ValueError): parse_url('[::1') - def test_parse_url_contains_control_characters(self): + @pytest.mark.parametrize('url, expected_url', [ + ( + 'http://localhost/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:', + Url('http', host='localhost', port=None, + path='/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:') + ), + ( + u'http://localhost/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:', + Url('http', host='localhost', port=None, + path='/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:') + ), + ( + 'http://localhost/ ?q=\r\n', + Url('http', host='localhost', path='/%20', query='q=%0D%0A') + ), + ]) + def test_parse_url_contains_control_characters(self, url, expected_url): # see CVE-2019-9740 - url = parse_url('http://localhost:8000/ HTTP/1.1\r\nHEADER: INJECTED\r\nIgnore:') - assert url.path == '/%20HTTP/1.1%0D%0AHEADER:%20INJECTED%0D%0AIgnore:' + url = parse_url(url) + assert url == expected_url def test_Url_str(self): U = Url('http', host='google.com') -- 2.20.1