From bf3e7d8ed57caab9c7f20bc2f4a7b1b6ba176b9f Mon Sep 17 00:00:00 2001 From: Charalampos Stratakis Date: Mon, 12 Dec 2016 19:17:03 +0100 Subject: [PATCH] Rebuild for Python 3.6 Added patch for fixing invalid escape sequences with Python 3.6 --- fix-invalid-escape-sequences.patch | 52 ++++++++++++++++++++++++++++++ python-html5lib.spec | 11 +++++-- 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 fix-invalid-escape-sequences.patch diff --git a/fix-invalid-escape-sequences.patch b/fix-invalid-escape-sequences.patch new file mode 100644 index 0000000..a1eb131 --- /dev/null +++ b/fix-invalid-escape-sequences.patch @@ -0,0 +1,52 @@ +diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py +index 71dc521..56e3ac7 100644 +--- a/html5lib/sanitizer.py ++++ b/html5lib/sanitizer.py +@@ -185,7 +185,7 @@ class HTMLSanitizerMixin(object): + for attr in self.attr_val_is_uri: + if attr not in attrs: + continue +- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', ++ val_unescaped = re.sub(r"[`\000-\040\177-\240\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") +@@ -199,7 +199,7 @@ class HTMLSanitizerMixin(object): + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and +- 'xlink:href' in attrs and re.search('^\s*[^#\s].*', ++ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*', + attrs['xlink:href'])): + del attrs['xlink:href'] + if 'style' in attrs: +@@ -228,16 +228,16 @@ class HTMLSanitizerMixin(object): + + def sanitize_css(self, style): + # disallow urls +- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) ++ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet +- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): ++ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' +- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): ++ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] +- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): ++ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: +@@ -246,7 +246,7 @@ class HTMLSanitizerMixin(object): + 'padding']: + for keyword in value.split(): + if not keyword in self.acceptable_css_keywords and \ +- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): ++ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): + break + else: + clean.append(prop + ': ' + value + ';') diff --git a/python-html5lib.spec b/python-html5lib.spec index b14fd85..8fad3f4 100644 --- a/python-html5lib.spec +++ b/python-html5lib.spec @@ -6,13 +6,15 @@ Name: python-%{modulename} Summary: A python based HTML parser/tokenizer Version: 0.999 -Release: 9%{?dist} +Release: 10%{?dist} Epoch: 1 Group: Development/Libraries License: MIT URL: https://pypi.python.org/pypi/%{modulename} -Source0: https://pypi.python.org/packages/source/h/%{modulename}/%{modulename}-%{version}.tar.gz +Source0: https://pypi.python.org/packages/source/h/%{modulename}/%{modulename}-%{version}.tar.gz +# Patch for fixing invalid escape sequences with Python 3.6 +Patch0: fix-invalid-escape-sequences.patch BuildArch: noarch Requires: python-six @@ -45,6 +47,7 @@ specification for maximum compatibility with major desktop web browsers. %prep %setup -q -n %{modulename}-%{version} +%patch0 -p1 %if 0%{?with_python3} rm -rf %{py3dir} @@ -94,6 +97,10 @@ popd %changelog +* Mon Dec 12 2016 Charalampos Stratakis - 1:0.999-10 +- Rebuild for Python 3.6 +- Fix invalid escape sequences + * Tue Jul 19 2016 Fedora Release Engineering - 1:0.999-9 - https://fedoraproject.org/wiki/Changes/Automatic_Provides_for_Python_RPM_Packages