You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
2.8 KiB
53 lines
2.8 KiB
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
|
|
index 71dc521..56e3ac7 100644
|
|
--- a/html5lib/sanitizer.py
|
|
+++ b/html5lib/sanitizer.py
|
|
@@ -185,7 +185,7 @@ class HTMLSanitizerMixin(object):
|
|
for attr in self.attr_val_is_uri:
|
|
if attr not in attrs:
|
|
continue
|
|
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
|
|
+ val_unescaped = re.sub(r"[`\000-\040\177-\240\s]+", '',
|
|
unescape(attrs[attr])).lower()
|
|
# remove replacement characters from unescaped characters
|
|
val_unescaped = val_unescaped.replace("\ufffd", "")
|
|
@@ -199,7 +199,7 @@ class HTMLSanitizerMixin(object):
|
|
' ',
|
|
unescape(attrs[attr]))
|
|
if (token["name"] in self.svg_allow_local_href and
|
|
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
|
|
+ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*',
|
|
attrs['xlink:href'])):
|
|
del attrs['xlink:href']
|
|
if 'style' in attrs:
|
|
@@ -228,16 +228,16 @@ class HTMLSanitizerMixin(object):
|
|
|
|
def sanitize_css(self, style):
|
|
# disallow urls
|
|
- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
|
+ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
|
|
|
# gauntlet
|
|
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
|
+ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
|
return ''
|
|
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
|
+ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
|
return ''
|
|
|
|
clean = []
|
|
- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
|
|
+ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
|
if not value:
|
|
continue
|
|
if prop.lower() in self.allowed_css_properties:
|
|
@@ -246,7 +246,7 @@ class HTMLSanitizerMixin(object):
|
|
'padding']:
|
|
for keyword in value.split():
|
|
if not keyword in self.acceptable_css_keywords and \
|
|
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
|
+ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
|
break
|
|
else:
|
|
clean.append(prop + ': ' + value + ';')
|