parent
8435b04104
commit
4d09487e6e
@ -0,0 +1,365 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Petr Viktorin <encukou@gmail.com>
|
||||||
|
Date: Wed, 31 Jul 2024 00:19:48 +0200
|
||||||
|
Subject: [PATCH] 00435: gh-121650: Encode newlines in headers, and verify
|
||||||
|
headers are sound (GH-122233)
|
||||||
|
|
||||||
|
Per RFC 2047:
|
||||||
|
|
||||||
|
> [...] these encoding schemes allow the
|
||||||
|
> encoding of arbitrary octet values, mail readers that implement this
|
||||||
|
> decoding should also ensure that display of the decoded data on the
|
||||||
|
> recipient's terminal will not cause unwanted side-effects
|
||||||
|
|
||||||
|
It seems that the "quoted-word" scheme is a valid way to include
|
||||||
|
a newline character in a header value, just like we already allow
|
||||||
|
undecodable bytes or control characters.
|
||||||
|
They do need to be properly quoted when serialized to text, though.
|
||||||
|
|
||||||
|
This should fail for custom fold() implementations that aren't careful
|
||||||
|
about newlines.
|
||||||
|
|
||||||
|
(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384)
|
||||||
|
|
||||||
|
Co-authored-by: Petr Viktorin <encukou@gmail.com>
|
||||||
|
Co-authored-by: Bas Bloemsaat <bas@bloemsaat.org>
|
||||||
|
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
|
||||||
|
---
|
||||||
|
Doc/library/email.errors.rst | 7 +++
|
||||||
|
Doc/library/email.policy.rst | 18 ++++++
|
||||||
|
Doc/whatsnew/3.11.rst | 13 ++++
|
||||||
|
Lib/email/_header_value_parser.py | 12 +++-
|
||||||
|
Lib/email/_policybase.py | 8 +++
|
||||||
|
Lib/email/errors.py | 4 ++
|
||||||
|
Lib/email/generator.py | 13 +++-
|
||||||
|
Lib/test/test_email/test_generator.py | 62 +++++++++++++++++++
|
||||||
|
Lib/test/test_email/test_policy.py | 26 ++++++++
|
||||||
|
...-07-27-16-10-41.gh-issue-121650.nf6oc9.rst | 5 ++
|
||||||
|
10 files changed, 164 insertions(+), 4 deletions(-)
|
||||||
|
create mode 100644 Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||||||
|
|
||||||
|
diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
|
||||||
|
index 56aea6598b..27b0481a85 100644
|
||||||
|
--- a/Doc/library/email.errors.rst
|
||||||
|
+++ b/Doc/library/email.errors.rst
|
||||||
|
@@ -58,6 +58,13 @@ The following exception classes are defined in the :mod:`email.errors` module:
|
||||||
|
:class:`~email.mime.nonmultipart.MIMENonMultipart` (e.g.
|
||||||
|
:class:`~email.mime.image.MIMEImage`).
|
||||||
|
|
||||||
|
+
|
||||||
|
+.. exception:: HeaderWriteError()
|
||||||
|
+
|
||||||
|
+ Raised when an error occurs when the :mod:`~email.generator` outputs
|
||||||
|
+ headers.
|
||||||
|
+
|
||||||
|
+
|
||||||
|
.. exception:: MessageDefect()
|
||||||
|
|
||||||
|
This is the base class for all defects found when parsing email messages.
|
||||||
|
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
|
||||||
|
index bb406c5a56..3edba4028b 100644
|
||||||
|
--- a/Doc/library/email.policy.rst
|
||||||
|
+++ b/Doc/library/email.policy.rst
|
||||||
|
@@ -228,6 +228,24 @@ added matters. To illustrate::
|
||||||
|
|
||||||
|
.. versionadded:: 3.6
|
||||||
|
|
||||||
|
+
|
||||||
|
+ .. attribute:: verify_generated_headers
|
||||||
|
+
|
||||||
|
+ If ``True`` (the default), the generator will raise
|
||||||
|
+ :exc:`~email.errors.HeaderWriteError` instead of writing a header
|
||||||
|
+ that is improperly folded or delimited, such that it would
|
||||||
|
+ be parsed as multiple headers or joined with adjacent data.
|
||||||
|
+ Such headers can be generated by custom header classes or bugs
|
||||||
|
+ in the ``email`` module.
|
||||||
|
+
|
||||||
|
+ As it's a security feature, this defaults to ``True`` even in the
|
||||||
|
+ :class:`~email.policy.Compat32` policy.
|
||||||
|
+ For backwards compatible, but unsafe, behavior, it must be set to
|
||||||
|
+ ``False`` explicitly.
|
||||||
|
+
|
||||||
|
+ .. versionadded:: 3.11.10
|
||||||
|
+
|
||||||
|
+
|
||||||
|
The following :class:`Policy` method is intended to be called by code using
|
||||||
|
the email library to create policy instances with custom settings:
|
||||||
|
|
||||||
|
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
|
||||||
|
index 42b61c75c7..f12c871998 100644
|
||||||
|
--- a/Doc/whatsnew/3.11.rst
|
||||||
|
+++ b/Doc/whatsnew/3.11.rst
|
||||||
|
@@ -2728,6 +2728,7 @@ OpenSSL
|
||||||
|
|
||||||
|
.. _libb2: https://www.blake2.net/
|
||||||
|
|
||||||
|
+
|
||||||
|
Notable changes in 3.11.10
|
||||||
|
==========================
|
||||||
|
|
||||||
|
@@ -2736,3 +2737,15 @@ ipaddress
|
||||||
|
|
||||||
|
* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``,
|
||||||
|
``IPv6Address``, ``IPv4Network`` and ``IPv6Network``.
|
||||||
|
+
|
||||||
|
+email
|
||||||
|
+-----
|
||||||
|
+
|
||||||
|
+* Headers with embedded newlines are now quoted on output.
|
||||||
|
+
|
||||||
|
+ The :mod:`~email.generator` will now refuse to serialize (write) headers
|
||||||
|
+ that are improperly folded or delimited, such that they would be parsed as
|
||||||
|
+ multiple headers or joined with adjacent data.
|
||||||
|
+ If you need to turn this safety feature off,
|
||||||
|
+ set :attr:`~email.policy.Policy.verify_generated_headers`.
|
||||||
|
+ (Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.)
|
||||||
|
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
|
||||||
|
index 8cb8852cf0..255a953092 100644
|
||||||
|
--- a/Lib/email/_header_value_parser.py
|
||||||
|
+++ b/Lib/email/_header_value_parser.py
|
||||||
|
@@ -92,6 +92,8 @@
|
||||||
|
ASPECIALS = TSPECIALS | set("*'%")
|
||||||
|
ATTRIBUTE_ENDS = ASPECIALS | WSP
|
||||||
|
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
|
||||||
|
+NLSET = {'\n', '\r'}
|
||||||
|
+SPECIALSNL = SPECIALS | NLSET
|
||||||
|
|
||||||
|
def quote_string(value):
|
||||||
|
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
|
||||||
|
@@ -2780,9 +2782,13 @@ def _refold_parse_tree(parse_tree, *, policy):
|
||||||
|
wrap_as_ew_blocked -= 1
|
||||||
|
continue
|
||||||
|
tstr = str(part)
|
||||||
|
- if part.token_type == 'ptext' and set(tstr) & SPECIALS:
|
||||||
|
- # Encode if tstr contains special characters.
|
||||||
|
- want_encoding = True
|
||||||
|
+ if not want_encoding:
|
||||||
|
+ if part.token_type == 'ptext':
|
||||||
|
+ # Encode if tstr contains special characters.
|
||||||
|
+ want_encoding = not SPECIALSNL.isdisjoint(tstr)
|
||||||
|
+ else:
|
||||||
|
+ # Encode if tstr contains newlines.
|
||||||
|
+ want_encoding = not NLSET.isdisjoint(tstr)
|
||||||
|
try:
|
||||||
|
tstr.encode(encoding)
|
||||||
|
charset = encoding
|
||||||
|
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
|
||||||
|
index c9cbadd2a8..d1f48211f9 100644
|
||||||
|
--- a/Lib/email/_policybase.py
|
||||||
|
+++ b/Lib/email/_policybase.py
|
||||||
|
@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||||||
|
message_factory -- the class to use to create new message objects.
|
||||||
|
If the value is None, the default is Message.
|
||||||
|
|
||||||
|
+ verify_generated_headers
|
||||||
|
+ -- if true, the generator verifies that each header
|
||||||
|
+ they are properly folded, so that a parser won't
|
||||||
|
+ treat it as multiple headers, start-of-body, or
|
||||||
|
+ part of another header.
|
||||||
|
+ This is a check against custom Header & fold()
|
||||||
|
+ implementations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
raise_on_defect = False
|
||||||
|
@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||||||
|
max_line_length = 78
|
||||||
|
mangle_from_ = False
|
||||||
|
message_factory = None
|
||||||
|
+ verify_generated_headers = True
|
||||||
|
|
||||||
|
def handle_defect(self, obj, defect):
|
||||||
|
"""Based on policy, either raise defect or call register_defect.
|
||||||
|
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
|
||||||
|
index 3ad0056554..02aa5eced6 100644
|
||||||
|
--- a/Lib/email/errors.py
|
||||||
|
+++ b/Lib/email/errors.py
|
||||||
|
@@ -29,6 +29,10 @@ class CharsetError(MessageError):
|
||||||
|
"""An illegal charset was given."""
|
||||||
|
|
||||||
|
|
||||||
|
+class HeaderWriteError(MessageError):
|
||||||
|
+ """Error while writing headers."""
|
||||||
|
+
|
||||||
|
+
|
||||||
|
# These are parsing defects which the parser was able to work around.
|
||||||
|
class MessageDefect(ValueError):
|
||||||
|
"""Base class for a message defect."""
|
||||||
|
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
|
||||||
|
index eb597de76d..563ca17072 100644
|
||||||
|
--- a/Lib/email/generator.py
|
||||||
|
+++ b/Lib/email/generator.py
|
||||||
|
@@ -14,12 +14,14 @@
|
||||||
|
from copy import deepcopy
|
||||||
|
from io import StringIO, BytesIO
|
||||||
|
from email.utils import _has_surrogates
|
||||||
|
+from email.errors import HeaderWriteError
|
||||||
|
|
||||||
|
UNDERSCORE = '_'
|
||||||
|
NL = '\n' # XXX: no longer used by the code below.
|
||||||
|
|
||||||
|
NLCRE = re.compile(r'\r\n|\r|\n')
|
||||||
|
fcre = re.compile(r'^From ', re.MULTILINE)
|
||||||
|
+NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
|
||||||
|
|
||||||
|
|
||||||
|
class Generator:
|
||||||
|
@@ -222,7 +224,16 @@ def _dispatch(self, msg):
|
||||||
|
|
||||||
|
def _write_headers(self, msg):
|
||||||
|
for h, v in msg.raw_items():
|
||||||
|
- self.write(self.policy.fold(h, v))
|
||||||
|
+ folded = self.policy.fold(h, v)
|
||||||
|
+ if self.policy.verify_generated_headers:
|
||||||
|
+ linesep = self.policy.linesep
|
||||||
|
+ if not folded.endswith(self.policy.linesep):
|
||||||
|
+ raise HeaderWriteError(
|
||||||
|
+ f'folded header does not end with {linesep!r}: {folded!r}')
|
||||||
|
+ if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)):
|
||||||
|
+ raise HeaderWriteError(
|
||||||
|
+ f'folded header contains newline: {folded!r}')
|
||||||
|
+ self.write(folded)
|
||||||
|
# A blank line always separates headers from body
|
||||||
|
self.write(self._NL)
|
||||||
|
|
||||||
|
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
|
||||||
|
index 89e7edeb63..d29400f0ed 100644
|
||||||
|
--- a/Lib/test/test_email/test_generator.py
|
||||||
|
+++ b/Lib/test/test_email/test_generator.py
|
||||||
|
@@ -6,6 +6,7 @@
|
||||||
|
from email.generator import Generator, BytesGenerator
|
||||||
|
from email.headerregistry import Address
|
||||||
|
from email import policy
|
||||||
|
+import email.errors
|
||||||
|
from test.test_email import TestEmailBase, parameterize
|
||||||
|
|
||||||
|
|
||||||
|
@@ -216,6 +217,44 @@ def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self):
|
||||||
|
g.flatten(msg)
|
||||||
|
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||||
|
|
||||||
|
+ def test_keep_encoded_newlines(self):
|
||||||
|
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
|
||||||
|
+ To: nobody
|
||||||
|
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||||||
|
+
|
||||||
|
+ None
|
||||||
|
+ """)))
|
||||||
|
+ expected = textwrap.dedent("""\
|
||||||
|
+ To: nobody
|
||||||
|
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||||||
|
+
|
||||||
|
+ None
|
||||||
|
+ """)
|
||||||
|
+ s = self.ioclass()
|
||||||
|
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=80))
|
||||||
|
+ g.flatten(msg)
|
||||||
|
+ self.assertEqual(s.getvalue(), self.typ(expected))
|
||||||
|
+
|
||||||
|
+ def test_keep_long_encoded_newlines(self):
|
||||||
|
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
|
||||||
|
+ To: nobody
|
||||||
|
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
|
||||||
|
+
|
||||||
|
+ None
|
||||||
|
+ """)))
|
||||||
|
+ expected = textwrap.dedent("""\
|
||||||
|
+ To: nobody
|
||||||
|
+ Subject: Bad subject
|
||||||
|
+ =?utf-8?q?=0A?=Bcc:
|
||||||
|
+ injection@example.com
|
||||||
|
+
|
||||||
|
+ None
|
||||||
|
+ """)
|
||||||
|
+ s = self.ioclass()
|
||||||
|
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
|
||||||
|
+ g.flatten(msg)
|
||||||
|
+ self.assertEqual(s.getvalue(), self.typ(expected))
|
||||||
|
+
|
||||||
|
|
||||||
|
class TestGenerator(TestGeneratorBase, TestEmailBase):
|
||||||
|
|
||||||
|
@@ -224,6 +263,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
|
||||||
|
ioclass = io.StringIO
|
||||||
|
typ = str
|
||||||
|
|
||||||
|
+ def test_verify_generated_headers(self):
|
||||||
|
+ """gh-121650: by default the generator prevents header injection"""
|
||||||
|
+ class LiteralHeader(str):
|
||||||
|
+ name = 'Header'
|
||||||
|
+ def fold(self, **kwargs):
|
||||||
|
+ return self
|
||||||
|
+
|
||||||
|
+ for text in (
|
||||||
|
+ 'Value\r\nBad Injection\r\n',
|
||||||
|
+ 'NoNewLine'
|
||||||
|
+ ):
|
||||||
|
+ with self.subTest(text=text):
|
||||||
|
+ message = message_from_string(
|
||||||
|
+ "Header: Value\r\n\r\nBody",
|
||||||
|
+ policy=self.policy,
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+ del message['Header']
|
||||||
|
+ message['Header'] = LiteralHeader(text)
|
||||||
|
+
|
||||||
|
+ with self.assertRaises(email.errors.HeaderWriteError):
|
||||||
|
+ message.as_string()
|
||||||
|
+
|
||||||
|
|
||||||
|
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
|
||||||
|
|
||||||
|
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
|
||||||
|
index c6b9c80efe..baa35fd68e 100644
|
||||||
|
--- a/Lib/test/test_email/test_policy.py
|
||||||
|
+++ b/Lib/test/test_email/test_policy.py
|
||||||
|
@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase):
|
||||||
|
'raise_on_defect': False,
|
||||||
|
'mangle_from_': True,
|
||||||
|
'message_factory': None,
|
||||||
|
+ 'verify_generated_headers': True,
|
||||||
|
}
|
||||||
|
# These default values are the ones set on email.policy.default.
|
||||||
|
# If any of these defaults change, the docs must be updated.
|
||||||
|
@@ -294,6 +295,31 @@ def test_short_maxlen_error(self):
|
||||||
|
with self.assertRaises(email.errors.HeaderParseError):
|
||||||
|
policy.fold("Subject", subject)
|
||||||
|
|
||||||
|
+ def test_verify_generated_headers(self):
|
||||||
|
+ """Turning protection off allows header injection"""
|
||||||
|
+ policy = email.policy.default.clone(verify_generated_headers=False)
|
||||||
|
+ for text in (
|
||||||
|
+ 'Header: Value\r\nBad: Injection\r\n',
|
||||||
|
+ 'Header: NoNewLine'
|
||||||
|
+ ):
|
||||||
|
+ with self.subTest(text=text):
|
||||||
|
+ message = email.message_from_string(
|
||||||
|
+ "Header: Value\r\n\r\nBody",
|
||||||
|
+ policy=policy,
|
||||||
|
+ )
|
||||||
|
+ class LiteralHeader(str):
|
||||||
|
+ name = 'Header'
|
||||||
|
+ def fold(self, **kwargs):
|
||||||
|
+ return self
|
||||||
|
+
|
||||||
|
+ del message['Header']
|
||||||
|
+ message['Header'] = LiteralHeader(text)
|
||||||
|
+
|
||||||
|
+ self.assertEqual(
|
||||||
|
+ message.as_string(),
|
||||||
|
+ f"{text}\nBody",
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
# XXX: Need subclassing tests.
|
||||||
|
# For adding subclassed objects, make sure the usual rules apply (subclass
|
||||||
|
# wins), but that the order still works (right overrides left).
|
||||||
|
diff --git a/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..83dd28d4ac
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+:mod:`email` headers with embedded newlines are now quoted on output. The
|
||||||
|
+:mod:`~email.generator` will now refuse to serialize (write) headers that
|
||||||
|
+are unsafely folded or delimited; see
|
||||||
|
+:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas
|
||||||
|
+Bloemsaat and Petr Viktorin in :gh:`121650`.)
|
@ -0,0 +1,128 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jason R. Coombs" <jaraco@jaraco.com>
|
||||||
|
Date: Mon, 19 Aug 2024 19:28:20 -0400
|
||||||
|
Subject: [PATCH] 00436: [CVE-2024-8088] gh-122905: Sanitize names in
|
||||||
|
zipfile.Path.
|
||||||
|
|
||||||
|
Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
|
||||||
|
---
|
||||||
|
Lib/test/test_zipfile.py | 17 ++++++
|
||||||
|
Lib/zipfile.py | 61 ++++++++++++++++++-
|
||||||
|
...-08-11-14-08-04.gh-issue-122905.7tDsxA.rst | 1 +
|
||||||
|
3 files changed, 78 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
|
||||||
|
|
||||||
|
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
|
||||||
|
index 4de6f379a4..8bdc7a1b7d 100644
|
||||||
|
--- a/Lib/test/test_zipfile.py
|
||||||
|
+++ b/Lib/test/test_zipfile.py
|
||||||
|
@@ -3651,6 +3651,23 @@ def test_extract_orig_with_implied_dirs(self, alpharep):
|
||||||
|
zipfile.Path(zf)
|
||||||
|
zf.extractall(source_path.parent)
|
||||||
|
|
||||||
|
+ def test_malformed_paths(self):
|
||||||
|
+ """
|
||||||
|
+ Path should handle malformed paths.
|
||||||
|
+ """
|
||||||
|
+ data = io.BytesIO()
|
||||||
|
+ zf = zipfile.ZipFile(data, "w")
|
||||||
|
+ zf.writestr("/one-slash.txt", b"content")
|
||||||
|
+ zf.writestr("//two-slash.txt", b"content")
|
||||||
|
+ zf.writestr("../parent.txt", b"content")
|
||||||
|
+ zf.filename = ''
|
||||||
|
+ root = zipfile.Path(zf)
|
||||||
|
+ assert list(map(str, root.iterdir())) == [
|
||||||
|
+ 'one-slash.txt',
|
||||||
|
+ 'two-slash.txt',
|
||||||
|
+ 'parent.txt',
|
||||||
|
+ ]
|
||||||
|
+
|
||||||
|
|
||||||
|
class EncodedMetadataTests(unittest.TestCase):
|
||||||
|
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
|
||||||
|
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
|
||||||
|
index 86829abce4..b7bf9ef7e3 100644
|
||||||
|
--- a/Lib/zipfile.py
|
||||||
|
+++ b/Lib/zipfile.py
|
||||||
|
@@ -9,6 +9,7 @@
|
||||||
|
import itertools
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
+import re
|
||||||
|
import shutil
|
||||||
|
import stat
|
||||||
|
import struct
|
||||||
|
@@ -2243,7 +2244,65 @@ def _difference(minuend, subtrahend):
|
||||||
|
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
|
||||||
|
|
||||||
|
|
||||||
|
-class CompleteDirs(ZipFile):
|
||||||
|
+class SanitizedNames:
|
||||||
|
+ """
|
||||||
|
+ ZipFile mix-in to ensure names are sanitized.
|
||||||
|
+ """
|
||||||
|
+
|
||||||
|
+ def namelist(self):
|
||||||
|
+ return list(map(self._sanitize, super().namelist()))
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def _sanitize(name):
|
||||||
|
+ r"""
|
||||||
|
+ Ensure a relative path with posix separators and no dot names.
|
||||||
|
+ Modeled after
|
||||||
|
+ https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
|
||||||
|
+ but provides consistent cross-platform behavior.
|
||||||
|
+ >>> san = SanitizedNames._sanitize
|
||||||
|
+ >>> san('/foo/bar')
|
||||||
|
+ 'foo/bar'
|
||||||
|
+ >>> san('//foo.txt')
|
||||||
|
+ 'foo.txt'
|
||||||
|
+ >>> san('foo/.././bar.txt')
|
||||||
|
+ 'foo/bar.txt'
|
||||||
|
+ >>> san('foo../.bar.txt')
|
||||||
|
+ 'foo../.bar.txt'
|
||||||
|
+ >>> san('\\foo\\bar.txt')
|
||||||
|
+ 'foo/bar.txt'
|
||||||
|
+ >>> san('D:\\foo.txt')
|
||||||
|
+ 'D/foo.txt'
|
||||||
|
+ >>> san('\\\\server\\share\\file.txt')
|
||||||
|
+ 'server/share/file.txt'
|
||||||
|
+ >>> san('\\\\?\\GLOBALROOT\\Volume3')
|
||||||
|
+ '?/GLOBALROOT/Volume3'
|
||||||
|
+ >>> san('\\\\.\\PhysicalDrive1\\root')
|
||||||
|
+ 'PhysicalDrive1/root'
|
||||||
|
+ Retain any trailing slash.
|
||||||
|
+ >>> san('abc/')
|
||||||
|
+ 'abc/'
|
||||||
|
+ Raises a ValueError if the result is empty.
|
||||||
|
+ >>> san('../..')
|
||||||
|
+ Traceback (most recent call last):
|
||||||
|
+ ...
|
||||||
|
+ ValueError: Empty filename
|
||||||
|
+ """
|
||||||
|
+
|
||||||
|
+ def allowed(part):
|
||||||
|
+ return part and part not in {'..', '.'}
|
||||||
|
+
|
||||||
|
+ # Remove the drive letter.
|
||||||
|
+ # Don't use ntpath.splitdrive, because that also strips UNC paths
|
||||||
|
+ bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
|
||||||
|
+ clean = bare.replace('\\', '/')
|
||||||
|
+ parts = clean.split('/')
|
||||||
|
+ joined = '/'.join(filter(allowed, parts))
|
||||||
|
+ if not joined:
|
||||||
|
+ raise ValueError("Empty filename")
|
||||||
|
+ return joined + '/' * name.endswith('/')
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class CompleteDirs(SanitizedNames, ZipFile):
|
||||||
|
"""
|
||||||
|
A ZipFile subclass that ensures that implied directories
|
||||||
|
are always included in the namelist.
|
||||||
|
diff --git a/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..1be44c906c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
|
||||||
|
@@ -0,0 +1 @@
|
||||||
|
+:class:`zipfile.Path` objects now sanitize names from the zipfile.
|
Loading…
Reference in new issue