import python3-3.6.8-59.el8

i8c-beta changed/i8c-beta/python3-3.6.8-59.el8
MSVSphere Packaging Team 10 months ago
commit 1accafc0b9

1
.gitignore vendored

@ -0,0 +1 @@
SOURCES/Python-3.6.8-noexe.tar.xz

@ -0,0 +1 @@
a39802ac8f0c61645c6a50fbdd32e3ca92862ff5 SOURCES/Python-3.6.8-noexe.tar.xz

@ -0,0 +1,19 @@
diff -up Python-3.1.1/Lib/distutils/unixccompiler.py.rpath Python-3.1.1/Lib/distutils/unixccompiler.py
--- Python-3.1.1/Lib/distutils/unixccompiler.py.rpath 2009-09-04 17:29:34.000000000 -0400
+++ Python-3.1.1/Lib/distutils/unixccompiler.py 2009-09-04 17:49:54.000000000 -0400
@@ -141,6 +141,15 @@ class UnixCCompiler(CCompiler):
if sys.platform == "cygwin":
exe_extension = ".exe"
+ def _fix_lib_args(self, libraries, library_dirs, runtime_library_dirs):
+ """Remove standard library path from rpath"""
+ libraries, library_dirs, runtime_library_dirs = super()._fix_lib_args(
+ libraries, library_dirs, runtime_library_dirs)
+ libdir = sysconfig.get_config_var('LIBDIR')
+ if runtime_library_dirs and (libdir in runtime_library_dirs):
+ runtime_library_dirs.remove(libdir)
+ return libraries, library_dirs, runtime_library_dirs
+
def preprocess(self, source, output_file=None, macros=None,
include_dirs=None, extra_preargs=None, extra_postargs=None):
fixed_args = self._fix_compile_args(None, macros, include_dirs)

@ -0,0 +1,218 @@
diff --git a/Lib/distutils/command/install.py b/Lib/distutils/command/install.py
index 9474e9c..c0ce4c6 100644
--- a/Lib/distutils/command/install.py
+++ b/Lib/distutils/command/install.py
@@ -30,14 +30,14 @@ WINDOWS_SCHEME = {
INSTALL_SCHEMES = {
'unix_prefix': {
'purelib': '$base/lib/python$py_version_short/site-packages',
- 'platlib': '$platbase/lib/python$py_version_short/site-packages',
+ 'platlib': '$platbase/lib64/python$py_version_short/site-packages',
'headers': '$base/include/python$py_version_short$abiflags/$dist_name',
'scripts': '$base/bin',
'data' : '$base',
},
'unix_home': {
'purelib': '$base/lib/python',
- 'platlib': '$base/lib/python',
+ 'platlib': '$base/lib64/python',
'headers': '$base/include/python/$dist_name',
'scripts': '$base/bin',
'data' : '$base',
diff --git a/Lib/distutils/sysconfig.py b/Lib/distutils/sysconfig.py
index 026cca7..6d3e077 100644
--- a/Lib/distutils/sysconfig.py
+++ b/Lib/distutils/sysconfig.py
@@ -132,8 +132,12 @@ def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
prefix = plat_specific and EXEC_PREFIX or PREFIX
if os.name == "posix":
+ if plat_specific or standard_lib:
+ lib = "lib64"
+ else:
+ lib = "lib"
libpython = os.path.join(prefix,
- "lib", "python" + get_python_version())
+ lib, "python" + get_python_version())
if standard_lib:
return libpython
else:
diff a/Lib/distutils/tests/test_install.py b/Lib/distutils/tests/test_install.py
--- a/Lib/distutils/tests/test_install.py
+++ b/Lib/distutils/tests/test_install.py
@@ -57,8 +57,9 @@
self.assertEqual(got, expected)
libdir = os.path.join(destination, "lib", "python")
+ platlibdir = os.path.join(destination, "lib64", "python")
check_path(cmd.install_lib, libdir)
- check_path(cmd.install_platlib, libdir)
+ check_path(cmd.install_platlib, platlibdir)
check_path(cmd.install_purelib, libdir)
check_path(cmd.install_headers,
os.path.join(destination, "include", "python", "foopkg"))
diff --git a/Lib/site.py b/Lib/site.py
index a84e3bb..ba0d3ea 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -303,11 +303,15 @@ def getsitepackages(prefixes=None):
seen.add(prefix)
if os.sep == '/':
+ sitepackages.append(os.path.join(prefix, "lib64",
+ "python" + sys.version[:3],
+ "site-packages"))
sitepackages.append(os.path.join(prefix, "lib",
"python%d.%d" % sys.version_info[:2],
"site-packages"))
else:
sitepackages.append(prefix)
+ sitepackages.append(os.path.join(prefix, "lib64", "site-packages"))
sitepackages.append(os.path.join(prefix, "lib", "site-packages"))
if sys.platform == "darwin":
# for framework builds *only* we add the standard Apple
diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py
index b9bbfe5..2a5f29c 100644
--- a/Lib/sysconfig.py
+++ b/Lib/sysconfig.py
@@ -20,10 +20,10 @@ __all__ = [
_INSTALL_SCHEMES = {
'posix_prefix': {
- 'stdlib': '{installed_base}/lib/python{py_version_short}',
- 'platstdlib': '{platbase}/lib/python{py_version_short}',
+ 'stdlib': '{installed_base}/lib64/python{py_version_short}',
+ 'platstdlib': '{platbase}/lib64/python{py_version_short}',
'purelib': '{base}/lib/python{py_version_short}/site-packages',
- 'platlib': '{platbase}/lib/python{py_version_short}/site-packages',
+ 'platlib': '{platbase}/lib64/python{py_version_short}/site-packages',
'include':
'{installed_base}/include/python{py_version_short}{abiflags}',
'platinclude':
@@ -61,10 +61,10 @@ _INSTALL_SCHEMES = {
'data': '{userbase}',
},
'posix_user': {
- 'stdlib': '{userbase}/lib/python{py_version_short}',
- 'platstdlib': '{userbase}/lib/python{py_version_short}',
+ 'stdlib': '{userbase}/lib64/python{py_version_short}',
+ 'platstdlib': '{userbase}/lib64/python{py_version_short}',
'purelib': '{userbase}/lib/python{py_version_short}/site-packages',
- 'platlib': '{userbase}/lib/python{py_version_short}/site-packages',
+ 'platlib': '{userbase}/lib64/python{py_version_short}/site-packages',
'include': '{userbase}/include/python{py_version_short}',
'scripts': '{userbase}/bin',
'data': '{userbase}',
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
index f698927..bc977b5 100644
--- a/Lib/test/test_site.py
+++ b/Lib/test/test_site.py
@@ -248,8 +248,8 @@ class HelperFunctionsTests(unittest.TestCase):
self.assertEqual(dirs[1], wanted)
elif os.sep == '/':
# OS X non-framework builds, Linux, FreeBSD, etc
- self.assertEqual(len(dirs), 1)
- wanted = os.path.join('xoxo', 'lib',
+ self.assertEqual(len(dirs), 2)
+ wanted = os.path.join('xoxo', 'lib64',
'python%d.%d' % sys.version_info[:2],
'site-packages')
self.assertEqual(dirs[0], wanted)
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 8fa7934..a693917 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -126,7 +126,7 @@ LIBDIR= @libdir@
MANDIR= @mandir@
INCLUDEDIR= @includedir@
CONFINCLUDEDIR= $(exec_prefix)/include
-SCRIPTDIR= $(prefix)/lib
+SCRIPTDIR= $(prefix)/lib64
ABIFLAGS= @ABIFLAGS@
# Detailed destination directories
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 65b47a3..eaa756c 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -494,7 +494,7 @@ calculate_path(void)
_pythonpath = Py_DecodeLocale(PYTHONPATH, NULL);
_prefix = Py_DecodeLocale(PREFIX, NULL);
_exec_prefix = Py_DecodeLocale(EXEC_PREFIX, NULL);
- lib_python = Py_DecodeLocale("lib/python" VERSION, NULL);
+ lib_python = Py_DecodeLocale("lib64/python" VERSION, NULL);
if (!_pythonpath || !_prefix || !_exec_prefix || !lib_python) {
Py_FatalError(
@@ -683,7 +683,7 @@ calculate_path(void)
}
else
wcsncpy(zip_path, _prefix, MAXPATHLEN);
- joinpath(zip_path, L"lib/python00.zip");
+ joinpath(zip_path, L"lib64/python00.zip");
bufsz = wcslen(zip_path); /* Replace "00" with version */
zip_path[bufsz - 6] = VERSION[0];
zip_path[bufsz - 5] = VERSION[2];
@@ -695,7 +695,7 @@ calculate_path(void)
fprintf(stderr,
"Could not find platform dependent libraries <exec_prefix>\n");
wcsncpy(exec_prefix, _exec_prefix, MAXPATHLEN);
- joinpath(exec_prefix, L"lib/lib-dynload");
+ joinpath(exec_prefix, L"lib64/lib-dynload");
}
/* If we found EXEC_PREFIX do *not* reduce it! (Yet.) */
diff --git a/setup.py b/setup.py
index 0f2dfc4..da37896 100644
--- a/setup.py
+++ b/setup.py
@@ -492,7 +492,7 @@ class PyBuildExt(build_ext):
# directories (i.e. '.' and 'Include') must be first. See issue
# 10520.
if not cross_compiling:
- add_dir_to_list(self.compiler.library_dirs, '/usr/local/lib')
+ add_dir_to_list(self.compiler.library_dirs, '/usr/local/lib64')
add_dir_to_list(self.compiler.include_dirs, '/usr/local/include')
# only change this for cross builds for 3.3, issues on Mageia
if cross_compiling:
@@ -780,11 +780,11 @@ class PyBuildExt(build_ext):
elif curses_library:
readline_libs.append(curses_library)
elif self.compiler.find_library_file(lib_dirs +
- ['/usr/lib/termcap'],
+ ['/usr/lib64/termcap'],
'termcap'):
readline_libs.append('termcap')
exts.append( Extension('readline', ['readline.c'],
- library_dirs=['/usr/lib/termcap'],
+ library_dirs=['/usr/lib64/termcap'],
extra_link_args=readline_extra_link_args,
libraries=readline_libs) )
else:
@@ -821,8 +821,8 @@ class PyBuildExt(build_ext):
if krb5_h:
ssl_incs += krb5_h
ssl_libs = find_library_file(self.compiler, 'ssl',lib_dirs,
- ['/usr/local/ssl/lib',
- '/usr/contrib/ssl/lib/'
+ ['/usr/local/ssl/lib64',
+ '/usr/contrib/ssl/lib64/'
] )
if (ssl_incs is not None and
diff --git a/configure.ac b/configure.ac
index 01c66fe..1e6d515 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4772,9 +4772,9 @@ AC_MSG_RESULT($LDVERSION)
dnl define LIBPL after ABIFLAGS and LDVERSION is defined.
AC_SUBST(PY_ENABLE_SHARED)
if test x$PLATFORM_TRIPLET = x; then
- LIBPL='$(prefix)'"/lib/python${VERSION}/config-${LDVERSION}"
+ LIBPL='$(prefix)'"/lib64/python${VERSION}/config-${LDVERSION}"
else
- LIBPL='$(prefix)'"/lib/python${VERSION}/config-${LDVERSION}-${PLATFORM_TRIPLET}"
+ LIBPL='$(prefix)'"/lib64/python${VERSION}/config-${LDVERSION}-${PLATFORM_TRIPLET}"
fi
AC_SUBST(LIBPL)

@ -0,0 +1,53 @@
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 9cd482f..b074b26 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -549,7 +549,7 @@ clinic: check-clean-src $(srcdir)/Modules/_blake2/blake2s_impl.c
$(PYTHON_FOR_REGEN) ./Tools/clinic/clinic.py --make
# Build the interpreter
-$(BUILDPYTHON): Programs/python.o $(LIBRARY) $(LDLIBRARY) $(PY3LIBRARY)
+$(BUILDPYTHON): Programs/python.o $(LDLIBRARY) $(PY3LIBRARY)
$(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/python.o $(BLDLIBRARY) $(LIBS) $(MODLIBS) $(SYSLIBS) $(LDLAST)
platform: $(BUILDPYTHON) pybuilddir.txt
@@ -597,12 +597,6 @@ sharedmods: $(BUILDPYTHON) pybuilddir.txt Modules/_math.o
_TCLTK_INCLUDES='$(TCLTK_INCLUDES)' _TCLTK_LIBS='$(TCLTK_LIBS)' \
$(PYTHON_FOR_BUILD) $(srcdir)/setup.py $$quiet build
-
-# Build static library
-$(LIBRARY): $(LIBRARY_OBJS)
- -rm -f $@
- $(AR) $(ARFLAGS) $@ $(LIBRARY_OBJS)
-
libpython$(LDVERSION).so: $(LIBRARY_OBJS)
if test $(INSTSONAME) != $(LDLIBRARY); then \
$(BLDSHARED) -Wl,-h$(INSTSONAME) -o $(INSTSONAME) $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST); \
@@ -692,7 +686,7 @@ Modules/Setup: $(srcdir)/Modules/Setup.dist
echo "-----------------------------------------------"; \
fi
-Programs/_testembed: Programs/_testembed.o $(LIBRARY) $(LDLIBRARY) $(PY3LIBRARY)
+Programs/_testembed: Programs/_testembed.o $(LDLIBRARY) $(PY3LIBRARY)
$(LINKCC) $(PY_CORE_LDFLAGS) $(LINKFORSHARED) -o $@ Programs/_testembed.o $(BLDLIBRARY) $(LIBS) $(MODLIBS) $(SYSLIBS) $(LDLAST)
############################################################################
@@ -1428,17 +1422,6 @@ libainstall: @DEF_MAKE_RULE@ python-config
else true; \
fi; \
done
- @if test -d $(LIBRARY); then :; else \
- if test "$(PYTHONFRAMEWORKDIR)" = no-framework; then \
- if test "$(SHLIB_SUFFIX)" = .dll; then \
- $(INSTALL_DATA) $(LDLIBRARY) $(DESTDIR)$(LIBPL) ; \
- else \
- $(INSTALL_DATA) $(LIBRARY) $(DESTDIR)$(LIBPL)/$(LIBRARY) ; \
- fi; \
- else \
- echo Skip install of $(LIBRARY) - use make frameworkinstall; \
- fi; \
- fi
$(INSTALL_DATA) Modules/config.c $(DESTDIR)$(LIBPL)/config.c
$(INSTALL_DATA) Programs/python.o $(DESTDIR)$(LIBPL)/python.o
$(INSTALL_DATA) $(srcdir)/Modules/config.c.in $(DESTDIR)$(LIBPL)/config.c.in

@ -0,0 +1,46 @@
diff -up Python-3.2.2/Lib/unittest/case.py.add-rpmbuild-hooks-to-unittest Python-3.2.2/Lib/unittest/case.py
--- Python-3.2.2/Lib/unittest/case.py.add-rpmbuild-hooks-to-unittest 2011-09-03 12:16:44.000000000 -0400
+++ Python-3.2.2/Lib/unittest/case.py 2011-09-09 06:35:16.365568382 -0400
@@ -3,6 +3,7 @@
import sys
import functools
import difflib
+import os
import logging
import pprint
import re
@@ -101,5 +102,21 @@ def expectedFailure(func):
raise self.test_case.failureException(msg)
+# Non-standard/downstream-only hooks for handling issues with specific test
+# cases:
+
+def _skipInRpmBuild(reason):
+ """
+ Non-standard/downstream-only decorator for marking a specific unit test
+ to be skipped when run within the %check of an rpmbuild.
+
+ Specifically, this takes effect when WITHIN_PYTHON_RPM_BUILD is set within
+ the environment, and has no effect otherwise.
+ """
+ if 'WITHIN_PYTHON_RPM_BUILD' in os.environ:
+ return skip(reason)
+ else:
+ return _id
+
class _AssertRaisesBaseContext(_BaseTestCaseContext):
def __init__(self, expected, test_case, expected_regex=None):
diff -up Python-3.2.2/Lib/unittest/__init__.py.add-rpmbuild-hooks-to-unittest Python-3.2.2/Lib/unittest/__init__.py
--- Python-3.2.2/Lib/unittest/__init__.py.add-rpmbuild-hooks-to-unittest 2011-09-03 12:16:44.000000000 -0400
+++ Python-3.2.2/Lib/unittest/__init__.py 2011-09-09 06:35:16.366568382 -0400
@@ -57,7 +57,8 @@ __unittest = True
from .result import TestResult
from .case import (TestCase, FunctionTestCase, SkipTest, skip, skipIf,
- skipUnless, expectedFailure)
+ skipUnless, expectedFailure,
+ _skipInRpmBuild)
from .suite import BaseTestSuite, TestSuite
from .loader import (TestLoader, defaultTestLoader, makeSuite, getTestCaseNames,
findTestCases)

@ -0,0 +1,15 @@
diff -up Python-3.2.3/Lib/ctypes/__init__.py.rhbz814391 Python-3.2.3/Lib/ctypes/__init__.py
--- Python-3.2.3/Lib/ctypes/__init__.py.rhbz814391 2012-04-20 15:12:49.017867692 -0400
+++ Python-3.2.3/Lib/ctypes/__init__.py 2012-04-20 15:15:09.501111408 -0400
@@ -275,11 +275,6 @@ def _reset_cache():
# _SimpleCData.c_char_p_from_param
POINTER(c_char).from_param = c_char_p.from_param
_pointer_type_cache[None] = c_void_p
- # XXX for whatever reasons, creating the first instance of a callback
- # function is needed for the unittests on Win64 to succeed. This MAY
- # be a compiler bug, since the problem occurs only when _ctypes is
- # compiled with the MS SDK compiler. Or an uninitialized variable?
- CFUNCTYPE(c_int)(lambda: None)
def create_unicode_buffer(init, size=None):
"""create_unicode_buffer(aString) -> character array

@ -0,0 +1,11 @@
diff -up cpython-59223da36dec/Lib/test/test_posix.py.disable-test_fs_holes-in-rpm-build cpython-59223da36dec/Lib/test/test_posix.py
--- cpython-59223da36dec/Lib/test/test_posix.py.disable-test_fs_holes-in-rpm-build 2012-08-07 17:15:59.000000000 -0400
+++ cpython-59223da36dec/Lib/test/test_posix.py 2012-08-07 17:16:53.528330330 -0400
@@ -973,6 +973,7 @@ class PosixTester(unittest.TestCase):
posix.RTLD_GLOBAL
posix.RTLD_LOCAL
+ @unittest._skipInRpmBuild('running kernel may not match kernel in chroot')
@unittest.skipUnless(hasattr(os, 'SEEK_HOLE'),
"test needs an OS that reports file holes")
def test_fs_holes(self):

@ -0,0 +1,11 @@
diff -up Python-3.3.0b1/Lib/test/test_socket.py.disable-test_socket-in-rpm-builds Python-3.3.0b1/Lib/test/test_socket.py
--- Python-3.3.0b1/Lib/test/test_socket.py.disable-test_socket-in-rpm-builds 2012-07-24 15:02:30.823355067 -0400
+++ Python-3.3.0b1/Lib/test/test_socket.py 2012-07-24 15:08:13.021354999 -0400
@@ -2188,6 +2188,7 @@ class RecvmsgGenericStreamTests(RecvmsgG
# Tests which require a stream socket and can use either recvmsg()
# or recvmsg_into().
+ @unittest._skipInRpmBuild('fails intermittently when run within Koji')
def testRecvmsgEOF(self):
# Receive end-of-stream indicator (b"", peer socket closed).
msg, ancdata, flags, addr = self.doRecvmsg(self.serv_sock, 1024)

@ -0,0 +1,309 @@
diff --git a/Include/object.h b/Include/object.h
index 63e37b8..613b26c 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -1071,6 +1071,49 @@ PyAPI_FUNC(void)
_PyObject_DebugTypeStats(FILE *out);
#endif /* ifndef Py_LIMITED_API */
+/*
+ Define a pair of assertion macros.
+
+ These work like the regular C assert(), in that they will abort the
+ process with a message on stderr if the given condition fails to hold,
+ but compile away to nothing if NDEBUG is defined.
+
+ However, before aborting, Python will also try to call _PyObject_Dump() on
+ the given object. This may be of use when investigating bugs in which a
+ particular object is corrupt (e.g. buggy a tp_visit method in an extension
+ module breaking the garbage collector), to help locate the broken objects.
+
+ The WITH_MSG variant allows you to supply an additional message that Python
+ will attempt to print to stderr, after the object dump.
+*/
+#ifdef NDEBUG
+/* No debugging: compile away the assertions: */
+#define PyObject_ASSERT_WITH_MSG(obj, expr, msg) ((void)0)
+#else
+/* With debugging: generate checks: */
+#define PyObject_ASSERT_WITH_MSG(obj, expr, msg) \
+ ((expr) \
+ ? (void)(0) \
+ : _PyObject_AssertFailed((obj), \
+ (msg), \
+ (__STRING(expr)), \
+ (__FILE__), \
+ (__LINE__), \
+ (__PRETTY_FUNCTION__)))
+#endif
+
+#define PyObject_ASSERT(obj, expr) \
+ PyObject_ASSERT_WITH_MSG(obj, expr, NULL)
+
+/*
+ Declare and define the entrypoint even when NDEBUG is defined, to avoid
+ causing compiler/linker errors when building extensions without NDEBUG
+ against a Python built with NDEBUG defined
+*/
+PyAPI_FUNC(void) _PyObject_AssertFailed(PyObject *, const char *,
+ const char *, const char *, int,
+ const char *);
+
#ifdef __cplusplus
}
#endif
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 7e82b24..8ecc3d9 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -2,9 +2,11 @@ import unittest
from test.support import (verbose, refcount_test, run_unittest,
strip_python_stderr, cpython_only, start_threads,
temp_dir, requires_type_collecting, TESTFN, unlink)
+from test.support import import_module
from test.support.script_helper import assert_python_ok, make_script
import sys
+import sysconfig
import time
import gc
import weakref
@@ -50,6 +52,8 @@ class GC_Detector(object):
# gc collects it.
self.wr = weakref.ref(C1055820(666), it_happened)
+BUILD_WITH_NDEBUG = ('-DNDEBUG' in sysconfig.get_config_vars()['PY_CFLAGS'])
+
@with_tp_del
class Uncollectable(object):
"""Create a reference cycle with multiple __del__ methods.
@@ -877,6 +881,50 @@ class GCCallbackTests(unittest.TestCase):
self.assertEqual(len(gc.garbage), 0)
+ @unittest.skipIf(BUILD_WITH_NDEBUG,
+ 'built with -NDEBUG')
+ def test_refcount_errors(self):
+ self.preclean()
+ # Verify the "handling" of objects with broken refcounts
+ import_module("ctypes") #skip if not supported
+
+ import subprocess
+ code = '''if 1:
+ a = []
+ b = [a]
+
+ # Simulate the refcount of "a" being too low (compared to the
+ # references held on it by live data), but keeping it above zero
+ # (to avoid deallocating it):
+ import ctypes
+ ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
+
+ # The garbage collector should now have a fatal error when it reaches
+ # the broken object:
+ import gc
+ gc.collect()
+ '''
+ p = subprocess.Popen([sys.executable, "-c", code],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ p.stdout.close()
+ p.stderr.close()
+ # Verify that stderr has a useful error message:
+ self.assertRegex(stderr,
+ b'Modules/gcmodule.c:[0-9]+: visit_decref: Assertion "\(\(gc\)->gc.gc_refs >> \(1\)\) != 0" failed.')
+ self.assertRegex(stderr,
+ b'refcount was too small')
+ self.assertRegex(stderr,
+ b'object : \[\]')
+ self.assertRegex(stderr,
+ b'type : list')
+ self.assertRegex(stderr,
+ b'refcount: 1')
+ self.assertRegex(stderr,
+ b'address : 0x[0-9a-f]+')
+
+
class GCTogglingTests(unittest.TestCase):
def setUp(self):
gc.enable()
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 3bddc40..0cc24f7 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -342,7 +342,8 @@ update_refs(PyGC_Head *containers)
{
PyGC_Head *gc = containers->gc.gc_next;
for (; gc != containers; gc = gc->gc.gc_next) {
- assert(_PyGCHead_REFS(gc) == GC_REACHABLE);
+ PyObject_ASSERT(FROM_GC(gc),
+ _PyGCHead_REFS(gc) == GC_REACHABLE);
_PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc)));
/* Python's cyclic gc should never see an incoming refcount
* of 0: if something decref'ed to 0, it should have been
@@ -362,7 +363,8 @@ update_refs(PyGC_Head *containers)
* so serious that maybe this should be a release-build
* check instead of an assert?
*/
- assert(_PyGCHead_REFS(gc) != 0);
+ PyObject_ASSERT(FROM_GC(gc),
+ _PyGCHead_REFS(gc) != 0);
}
}
@@ -377,7 +379,9 @@ visit_decref(PyObject *op, void *data)
* generation being collected, which can be recognized
* because only they have positive gc_refs.
*/
- assert(_PyGCHead_REFS(gc) != 0); /* else refcount was too small */
+ PyObject_ASSERT_WITH_MSG(FROM_GC(gc),
+ _PyGCHead_REFS(gc) != 0,
+ "refcount was too small"); /* else refcount was too small */
if (_PyGCHead_REFS(gc) > 0)
_PyGCHead_DECREF(gc);
}
@@ -437,9 +441,10 @@ visit_reachable(PyObject *op, PyGC_Head *reachable)
* If gc_refs == GC_UNTRACKED, it must be ignored.
*/
else {
- assert(gc_refs > 0
- || gc_refs == GC_REACHABLE
- || gc_refs == GC_UNTRACKED);
+ PyObject_ASSERT(FROM_GC(gc),
+ gc_refs > 0
+ || gc_refs == GC_REACHABLE
+ || gc_refs == GC_UNTRACKED);
}
}
return 0;
@@ -481,7 +486,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
*/
PyObject *op = FROM_GC(gc);
traverseproc traverse = Py_TYPE(op)->tp_traverse;
- assert(_PyGCHead_REFS(gc) > 0);
+ PyObject_ASSERT(op, _PyGCHead_REFS(gc) > 0);
_PyGCHead_SET_REFS(gc, GC_REACHABLE);
(void) traverse(op,
(visitproc)visit_reachable,
@@ -544,7 +549,7 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
PyObject *op = FROM_GC(gc);
- assert(IS_TENTATIVELY_UNREACHABLE(op));
+ PyObject_ASSERT(op, IS_TENTATIVELY_UNREACHABLE(op));
next = gc->gc.gc_next;
if (has_legacy_finalizer(op)) {
@@ -620,7 +625,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
PyWeakReference **wrlist;
op = FROM_GC(gc);
- assert(IS_TENTATIVELY_UNREACHABLE(op));
+ PyObject_ASSERT(op, IS_TENTATIVELY_UNREACHABLE(op));
next = gc->gc.gc_next;
if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
@@ -641,9 +646,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
* the callback pointer intact. Obscure: it also
* changes *wrlist.
*/
- assert(wr->wr_object == op);
+ PyObject_ASSERT(wr->wr_object, wr->wr_object == op);
_PyWeakref_ClearRef(wr);
- assert(wr->wr_object == Py_None);
+ PyObject_ASSERT(wr->wr_object, wr->wr_object == Py_None);
if (wr->wr_callback == NULL)
continue; /* no callback */
@@ -677,7 +682,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
*/
if (IS_TENTATIVELY_UNREACHABLE(wr))
continue;
- assert(IS_REACHABLE(wr));
+ PyObject_ASSERT(op, IS_REACHABLE(wr));
/* Create a new reference so that wr can't go away
* before we can process it again.
@@ -686,7 +691,8 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
/* Move wr to wrcb_to_call, for the next pass. */
wrasgc = AS_GC(wr);
- assert(wrasgc != next); /* wrasgc is reachable, but
+ PyObject_ASSERT(op, wrasgc != next);
+ /* wrasgc is reachable, but
next isn't, so they can't
be the same */
gc_list_move(wrasgc, &wrcb_to_call);
@@ -702,11 +708,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
gc = wrcb_to_call.gc.gc_next;
op = FROM_GC(gc);
- assert(IS_REACHABLE(op));
- assert(PyWeakref_Check(op));
+ PyObject_ASSERT(op, IS_REACHABLE(op));
+ PyObject_ASSERT(op, PyWeakref_Check(op));
wr = (PyWeakReference *)op;
callback = wr->wr_callback;
- assert(callback != NULL);
+ PyObject_ASSERT(op, callback != NULL);
/* copy-paste of weakrefobject.c's handle_callback() */
temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
@@ -820,12 +826,14 @@ check_garbage(PyGC_Head *collectable)
for (gc = collectable->gc.gc_next; gc != collectable;
gc = gc->gc.gc_next) {
_PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc)));
- assert(_PyGCHead_REFS(gc) != 0);
+ PyObject_ASSERT(FROM_GC(gc),
+ _PyGCHead_REFS(gc) != 0);
}
subtract_refs(collectable);
for (gc = collectable->gc.gc_next; gc != collectable;
gc = gc->gc.gc_next) {
- assert(_PyGCHead_REFS(gc) >= 0);
+ PyObject_ASSERT(FROM_GC(gc),
+ _PyGCHead_REFS(gc) >= 0);
if (_PyGCHead_REFS(gc) != 0)
return -1;
}
diff --git a/Objects/object.c b/Objects/object.c
index fdd41a6..bfe806c 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2031,6 +2031,35 @@ _PyTrash_thread_destroy_chain(void)
}
}
+PyAPI_FUNC(void)
+_PyObject_AssertFailed(PyObject *obj, const char *msg, const char *expr,
+ const char *file, int line, const char *function)
+{
+ fprintf(stderr,
+ "%s:%d: %s: Assertion \"%s\" failed.\n",
+ file, line, function, expr);
+ if (msg) {
+ fprintf(stderr, "%s\n", msg);
+ }
+
+ fflush(stderr);
+
+ if (obj) {
+ /* This might succeed or fail, but we're about to abort, so at least
+ try to provide any extra info we can: */
+ _PyObject_Dump(obj);
+ }
+ else {
+ fprintf(stderr, "NULL object\n");
+ }
+
+ fflush(stdout);
+ fflush(stderr);
+
+ /* Terminate the process: */
+ abort();
+}
+
#ifndef Py_TRACE_REFS
/* For Py_LIMITED_API, we need an out-of-line version of _Py_Dealloc.
Define this here, so we can undefine the macro. */

@ -0,0 +1,70 @@
diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py
index 09c572d..167d27b 100644
--- a/Lib/ensurepip/__init__.py
+++ b/Lib/ensurepip/__init__.py
@@ -1,16 +1,27 @@
+import distutils.version
+import glob
import os
import os.path
-import pkgutil
import sys
import tempfile
__all__ = ["version", "bootstrap"]
+_WHEEL_DIR = "/usr/share/python{}-wheels/".format(sys.version_info[0])
-_SETUPTOOLS_VERSION = "40.6.2"
+def _get_most_recent_wheel_version(pkg):
+ prefix = os.path.join(_WHEEL_DIR, "{}-".format(pkg))
+ suffix = "-py2.py3-none-any.whl"
+ pattern = "{}*{}".format(prefix, suffix)
+ versions = (p[len(prefix):-len(suffix)] for p in glob.glob(pattern))
+ return str(max(versions, key=distutils.version.LooseVersion))
+
+
+_SETUPTOOLS_VERSION = _get_most_recent_wheel_version("setuptools")
+
+_PIP_VERSION = _get_most_recent_wheel_version("pip")
-_PIP_VERSION = "18.1"
_PROJECTS = [
("setuptools", _SETUPTOOLS_VERSION),
@@ -23,9 +34,15 @@ def _run_pip(args, additional_paths=None):
if additional_paths is not None:
sys.path = additional_paths + sys.path
- # Install the bundled software
- import pip._internal
- return pip._internal.main(args)
+ try:
+ # pip 10
+ from pip._internal import main
+ except ImportError:
+ # pip 9
+ from pip import main
+ if args[0] in ["install", "list", "wheel"]:
+ args.append('--pre')
+ return main(args)
def version():
@@ -94,12 +111,9 @@ def _bootstrap(*, root=None, upgrade=False, user=False,
additional_paths = []
for project, version in _PROJECTS:
wheel_name = "{}-{}-py2.py3-none-any.whl".format(project, version)
- whl = pkgutil.get_data(
- "ensurepip",
- "_bundled/{}".format(wheel_name),
- )
- with open(os.path.join(tmpdir, wheel_name), "wb") as fp:
- fp.write(whl)
+ with open(os.path.join(_WHEEL_DIR, wheel_name), "rb") as sfp:
+ with open(os.path.join(tmpdir, wheel_name), "wb") as fp:
+ fp.write(sfp.read())
additional_paths.append(os.path.join(tmpdir, wheel_name))

@ -0,0 +1,46 @@
diff --git a/Lib/distutils/command/install.py b/Lib/distutils/command/install.py
index 0258d3d..4ebf50a 100644
--- a/Lib/distutils/command/install.py
+++ b/Lib/distutils/command/install.py
@@ -418,8 +418,19 @@ class install(Command):
raise DistutilsOptionError(
"must not supply exec-prefix without prefix")
- self.prefix = os.path.normpath(sys.prefix)
- self.exec_prefix = os.path.normpath(sys.exec_prefix)
+ # self.prefix is set to sys.prefix + /local/
+ # if neither RPM build nor virtual environment is
+ # detected to make pip and distutils install packages
+ # into the separate location.
+ if (not (hasattr(sys, 'real_prefix') or
+ sys.prefix != sys.base_prefix) and
+ 'RPM_BUILD_ROOT' not in os.environ):
+ addition = "/local"
+ else:
+ addition = ""
+
+ self.prefix = os.path.normpath(sys.prefix) + addition
+ self.exec_prefix = os.path.normpath(sys.exec_prefix) + addition
else:
if self.exec_prefix is None:
diff --git a/Lib/site.py b/Lib/site.py
index 0fc9200..c95202e 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -322,7 +322,14 @@ def getsitepackages(prefixes=None):
return sitepackages
def addsitepackages(known_paths, prefixes=None):
- """Add site-packages to sys.path"""
+ """Add site-packages to sys.path
+
+ '/usr/local' is included in PREFIXES if RPM build is not detected
+ to make packages installed into this location visible.
+
+ """
+ if ENABLE_USER_SITE and 'RPM_BUILD_ROOT' not in os.environ:
+ PREFIXES.insert(0, "/usr/local")
for sitedir in getsitepackages(prefixes):
if os.path.isdir(sitedir):
addsitedir(sitedir, known_paths)

@ -0,0 +1,40 @@
diff --git a/Lib/threading.py b/Lib/threading.py
index 7ab9ad8..dcedd3b 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -3,7 +3,7 @@
import sys as _sys
import _thread
-from time import monotonic as _time
+from time import monotonic as _time, sleep as _sleep
from traceback import format_exc as _format_exc
from _weakrefset import WeakSet
from itertools import islice as _islice, count as _count
@@ -296,7 +296,25 @@ class Condition:
gotit = True
else:
if timeout > 0:
- gotit = waiter.acquire(True, timeout)
+ # rhbz#2003758: Avoid waiter.acquire(True, timeout) since
+ # it uses the system clock internally.
+ #
+ # Balancing act: We can't afford a pure busy loop, so we
+ # have to sleep; but if we sleep the whole timeout time,
+ # we'll be unresponsive. The scheme here sleeps very
+ # little at first, longer as time goes on, but never longer
+ # than 20 times per second (or the timeout time remaining).
+ endtime = _time() + timeout
+ delay = 0.0005 # 500 us -> initial delay of 1 ms
+ while True:
+ gotit = waiter.acquire(0)
+ if gotit:
+ break
+ remaining = min(endtime - _time(), timeout)
+ if remaining <= 0:
+ break
+ delay = min(delay * 2, remaining, .05)
+ _sleep(delay)
else:
gotit = waiter.acquire(False)
return gotit

@ -0,0 +1,901 @@
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index d14793a..65aa3ad 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -728,6 +728,45 @@ conflict.
.. versionadded:: 3.6
+
+.. envvar:: PYTHONCOERCECLOCALE
+
+ If set to the value ``0``, causes the main Python command line application
+ to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
+ based alternative. Note that this setting is checked even when the
+ :option:`-E` or :option:`-I` options are used, as it is handled prior to
+ the processing of command line options.
+
+ If this variable is *not* set, or is set to a value other than ``0``, and
+ the current locale reported for the ``LC_CTYPE`` category is the default
+ ``C`` locale, then the Python CLI will attempt to configure one of the
+ following locales for the given locale categories before loading the
+ interpreter runtime:
+
+ * ``C.UTF-8`` (``LC_ALL``)
+ * ``C.utf8`` (``LC_ALL``)
+ * ``UTF-8`` (``LC_CTYPE``)
+
+ If setting one of these locale categories succeeds, then the matching
+ environment variables will be set (both ``LC_ALL`` and ``LANG`` for the
+ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category) in
+ the current process environment before the Python runtime is initialized.
+
+ Configuring one of these locales (either explicitly or via the above
+ implicit locale coercion) will automatically set the error handler for
+ :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
+ behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
+
+ For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
+ Python to emit warning messages on ``stderr`` if either the locale coercion
+ activates, or else if a locale that *would* have triggered coercion is
+ still active when the Python runtime is initialized.
+
+ Availability: \*nix
+
+ .. versionadded:: 3.7
+ See :pep:`538` for more details.
+
Debug-mode variables
~~~~~~~~~~~~~~~~~~~~
diff --git a/Lib/test/support/script_helper.py b/Lib/test/support/script_helper.py
index 507dc48..c3cb720 100644
--- a/Lib/test/support/script_helper.py
+++ b/Lib/test/support/script_helper.py
@@ -56,8 +56,35 @@ def interpreter_requires_environment():
return __cached_interp_requires_environment
-_PythonRunResult = collections.namedtuple("_PythonRunResult",
- ("rc", "out", "err"))
+class _PythonRunResult(collections.namedtuple("_PythonRunResult",
+ ("rc", "out", "err"))):
+ """Helper for reporting Python subprocess run results"""
+ def fail(self, cmd_line):
+ """Provide helpful details about failed subcommand runs"""
+ # Limit to 80 lines to ASCII characters
+ maxlen = 80 * 100
+ out, err = self.out, self.err
+ if len(out) > maxlen:
+ out = b'(... truncated stdout ...)' + out[-maxlen:]
+ if len(err) > maxlen:
+ err = b'(... truncated stderr ...)' + err[-maxlen:]
+ out = out.decode('ascii', 'replace').rstrip()
+ err = err.decode('ascii', 'replace').rstrip()
+ raise AssertionError("Process return code is %d\n"
+ "command line: %r\n"
+ "\n"
+ "stdout:\n"
+ "---\n"
+ "%s\n"
+ "---\n"
+ "\n"
+ "stderr:\n"
+ "---\n"
+ "%s\n"
+ "---"
+ % (self.rc, cmd_line,
+ out,
+ err))
# Executing the interpreter in a subprocess
@@ -115,30 +142,7 @@ def run_python_until_end(*args, **env_vars):
def _assert_python(expected_success, *args, **env_vars):
res, cmd_line = run_python_until_end(*args, **env_vars)
if (res.rc and expected_success) or (not res.rc and not expected_success):
- # Limit to 80 lines to ASCII characters
- maxlen = 80 * 100
- out, err = res.out, res.err
- if len(out) > maxlen:
- out = b'(... truncated stdout ...)' + out[-maxlen:]
- if len(err) > maxlen:
- err = b'(... truncated stderr ...)' + err[-maxlen:]
- out = out.decode('ascii', 'replace').rstrip()
- err = err.decode('ascii', 'replace').rstrip()
- raise AssertionError("Process return code is %d\n"
- "command line: %r\n"
- "\n"
- "stdout:\n"
- "---\n"
- "%s\n"
- "---\n"
- "\n"
- "stderr:\n"
- "---\n"
- "%s\n"
- "---"
- % (res.rc, cmd_line,
- out,
- err))
+ res.fail(cmd_line)
return res
def assert_python_ok(*args, **env_vars):
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
new file mode 100644
index 0000000..635c98f
--- /dev/null
+++ b/Lib/test/test_c_locale_coercion.py
@@ -0,0 +1,371 @@
+# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
+
+import unittest
+import locale
+import os
+import sys
+import sysconfig
+import shutil
+import subprocess
+from collections import namedtuple
+
+import test.support
+from test.support.script_helper import (
+ run_python_until_end,
+ interpreter_requires_environment,
+)
+
+# Set our expectation for the default encoding used in the C locale
+# for the filesystem encoding and the standard streams
+
+# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
+if sys.platform.startswith("aix"):
+ C_LOCALE_STREAM_ENCODING = "iso8859-1"
+else:
+ C_LOCALE_STREAM_ENCODING = "ascii"
+
+# FS encoding is UTF-8 on macOS, other *nix platforms use the locale encoding
+if sys.platform == "darwin":
+ C_LOCALE_FS_ENCODING = "utf-8"
+else:
+ C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
+
+# Note that the above is probably still wrong in some cases, such as:
+# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
+# * AIX and any other platforms that use latin-1 in the C locale
+#
+# Options for dealing with this:
+# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
+# * Fix the test expectations to match the actual platform behaviour
+
+# In order to get the warning messages to match up as expected, the candidate
+# order here must much the target locale order in Python/pylifecycle.c
+_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
+
+# There's no reliable cross-platform way of checking locale alias
+# lists, so the only way of knowing which of these locales will work
+# is to try them with locale.setlocale(). We do that in a subprocess
+# to avoid altering the locale of the test runner.
+#
+# If the relevant locale module attributes exist, and we're not on a platform
+# where we expect it to always succeed, we also check that
+# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
+# will skip locale coercion for that particular target locale
+_check_nl_langinfo_CODESET = bool(
+ sys.platform not in ("darwin", "linux") and
+ hasattr(locale, "nl_langinfo") and
+ hasattr(locale, "CODESET")
+)
+
+def _set_locale_in_subprocess(locale_name):
+ cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
+ if _check_nl_langinfo_CODESET:
+ # If there's no valid CODESET, we expect coercion to be skipped
+ cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
+ cmd = cmd_fmt.format(locale_name)
+ result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
+ return result.rc == 0
+
+
+
+_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
+_EncodingDetails = namedtuple("EncodingDetails", _fields)
+
+class EncodingDetails(_EncodingDetails):
+ # XXX (ncoghlan): Using JSON for child state reporting may be less fragile
+ CHILD_PROCESS_SCRIPT = ";".join([
+ "import sys, os",
+ "print(sys.getfilesystemencoding())",
+ "print(sys.stdin.encoding + ':' + sys.stdin.errors)",
+ "print(sys.stdout.encoding + ':' + sys.stdout.errors)",
+ "print(sys.stderr.encoding + ':' + sys.stderr.errors)",
+ "print(os.environ.get('LANG', 'not set'))",
+ "print(os.environ.get('LC_CTYPE', 'not set'))",
+ "print(os.environ.get('LC_ALL', 'not set'))",
+ ])
+
+ @classmethod
+ def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
+ """Returns expected child process details for a given encoding"""
+ _stream = stream_encoding + ":{}"
+ # stdin and stdout should use surrogateescape either because the
+ # coercion triggered, or because the C locale was detected
+ stream_info = 2*[_stream.format("surrogateescape")]
+ # stderr should always use backslashreplace
+ stream_info.append(_stream.format("backslashreplace"))
+ expected_lang = env_vars.get("LANG", "not set").lower()
+ if coercion_expected:
+ expected_lc_ctype = CLI_COERCION_TARGET.lower()
+ else:
+ expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
+ expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
+ env_info = expected_lang, expected_lc_ctype, expected_lc_all
+ return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
+
+ @staticmethod
+ def _handle_output_variations(data):
+ """Adjust the output to handle platform specific idiosyncrasies
+
+ * Some platforms report ASCII as ANSI_X3.4-1968
+ * Some platforms report ASCII as US-ASCII
+ * Some platforms report UTF-8 instead of utf-8
+ """
+ data = data.replace(b"ANSI_X3.4-1968", b"ascii")
+ data = data.replace(b"US-ASCII", b"ascii")
+ data = data.lower()
+ return data
+
+ @classmethod
+ def get_child_details(cls, env_vars):
+ """Retrieves fsencoding and standard stream details from a child process
+
+ Returns (encoding_details, stderr_lines):
+
+ - encoding_details: EncodingDetails for eager decoding
+ - stderr_lines: result of calling splitlines() on the stderr output
+
+ The child is run in isolated mode if the current interpreter supports
+ that.
+ """
+ result, py_cmd = run_python_until_end(
+ "-c", cls.CHILD_PROCESS_SCRIPT,
+ __isolated=True,
+ **env_vars
+ )
+ if not result.rc == 0:
+ result.fail(py_cmd)
+ # All subprocess outputs in this test case should be pure ASCII
+ adjusted_output = cls._handle_output_variations(result.out)
+ stdout_lines = adjusted_output.decode("ascii").splitlines()
+ child_encoding_details = dict(cls(*stdout_lines)._asdict())
+ stderr_lines = result.err.decode("ascii").rstrip().splitlines()
+ return child_encoding_details, stderr_lines
+
+
+# Details of the shared library warning emitted at runtime
+LEGACY_LOCALE_WARNING = (
+ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
+ "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
+ "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
+ "locales is recommended."
+)
+
+# Details of the CLI locale coercion warning emitted at runtime
+CLI_COERCION_WARNING_FMT = (
+ "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)."
+)
+
+
+AVAILABLE_TARGETS = None
+CLI_COERCION_TARGET = None
+CLI_COERCION_WARNING = None
+
+def setUpModule():
+ global AVAILABLE_TARGETS
+ global CLI_COERCION_TARGET
+ global CLI_COERCION_WARNING
+
+ if AVAILABLE_TARGETS is not None:
+ # initialization already done
+ return
+ AVAILABLE_TARGETS = []
+
+ # Find the target locales available in the current system
+ for target_locale in _C_UTF8_LOCALES:
+ if _set_locale_in_subprocess(target_locale):
+ AVAILABLE_TARGETS.append(target_locale)
+
+ if AVAILABLE_TARGETS:
+ # Coercion is expected to use the first available target locale
+ CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
+ CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
+
+
+class _LocaleHandlingTestCase(unittest.TestCase):
+ # Base class to check expected locale handling behaviour
+
+ def _check_child_encoding_details(self,
+ env_vars,
+ expected_fs_encoding,
+ expected_stream_encoding,
+ expected_warnings,
+ coercion_expected):
+ """Check the C locale handling for the given process environment
+
+ Parameters:
+ expected_fs_encoding: expected sys.getfilesystemencoding() result
+ expected_stream_encoding: expected encoding for standard streams
+ expected_warning: stderr output to expect (if any)
+ """
+ result = EncodingDetails.get_child_details(env_vars)
+ encoding_details, stderr_lines = result
+ expected_details = EncodingDetails.get_expected_details(
+ coercion_expected,
+ expected_fs_encoding,
+ expected_stream_encoding,
+ env_vars
+ )
+ self.assertEqual(encoding_details, expected_details)
+ if expected_warnings is None:
+ expected_warnings = []
+ self.assertEqual(stderr_lines, expected_warnings)
+
+
+class LocaleConfigurationTests(_LocaleHandlingTestCase):
+ # Test explicit external configuration via the process environment
+
+ def setUpClass():
+ # This relies on setupModule() having been run, so it can't be
+ # handled via the @unittest.skipUnless decorator
+ if not AVAILABLE_TARGETS:
+ raise unittest.SkipTest("No C-with-UTF-8 locale available")
+
+ def test_external_target_locale_configuration(self):
+
+ # Explicitly setting a target locale should give the same behaviour as
+ # is seen when implicitly coercing to that target locale
+ self.maxDiff = None
+
+ expected_fs_encoding = "utf-8"
+ expected_stream_encoding = "utf-8"
+
+ base_var_dict = {
+ "LANG": "",
+ "LC_CTYPE": "",
+ "LC_ALL": "",
+ }
+ for env_var in ("LANG", "LC_CTYPE"):
+ for locale_to_set in AVAILABLE_TARGETS:
+ # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
+ # expected, so skip that combination for now
+ # See https://bugs.python.org/issue30672 for discussion
+ if env_var == "LANG" and locale_to_set == "UTF-8":
+ continue
+
+ with self.subTest(env_var=env_var,
+ configured_locale=locale_to_set):
+ var_dict = base_var_dict.copy()
+ var_dict[env_var] = locale_to_set
+ self._check_child_encoding_details(var_dict,
+ expected_fs_encoding,
+ expected_stream_encoding,
+ expected_warnings=None,
+ coercion_expected=False)
+
+
+
+@test.support.cpython_only
+@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
+ "C locale coercion disabled at build time")
+class LocaleCoercionTests(_LocaleHandlingTestCase):
+ # Test implicit reconfiguration of the environment during CLI startup
+
+ def _check_c_locale_coercion(self,
+ fs_encoding, stream_encoding,
+ coerce_c_locale,
+ expected_warnings=None,
+ coercion_expected=True,
+ **extra_vars):
+ """Check the C locale handling for various configurations
+
+ Parameters:
+ fs_encoding: expected sys.getfilesystemencoding() result
+ stream_encoding: expected encoding for standard streams
+ coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
+ None: don't set the variable at all
+ str: the value set in the child's environment
+ expected_warnings: expected warning lines on stderr
+ extra_vars: additional environment variables to set in subprocess
+ """
+ self.maxDiff = None
+
+ if not AVAILABLE_TARGETS:
+ # Locale coercion is disabled when there aren't any target locales
+ fs_encoding = C_LOCALE_FS_ENCODING
+ stream_encoding = C_LOCALE_STREAM_ENCODING
+ coercion_expected = False
+ if expected_warnings:
+ expected_warnings = [LEGACY_LOCALE_WARNING]
+
+ base_var_dict = {
+ "LANG": "",
+ "LC_CTYPE": "",
+ "LC_ALL": "",
+ }
+ base_var_dict.update(extra_vars)
+ for env_var in ("LANG", "LC_CTYPE"):
+ for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
+ # XXX (ncoghlan): *BSD platforms don't behave as expected in the
+ # POSIX locale, so we skip that for now
+ # See https://bugs.python.org/issue30672 for discussion
+ if locale_to_set == "POSIX":
+ continue
+ with self.subTest(env_var=env_var,
+ nominal_locale=locale_to_set,
+ PYTHONCOERCECLOCALE=coerce_c_locale):
+ var_dict = base_var_dict.copy()
+ var_dict[env_var] = locale_to_set
+ if coerce_c_locale is not None:
+ var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
+ # Check behaviour on successful coercion
+ self._check_child_encoding_details(var_dict,
+ fs_encoding,
+ stream_encoding,
+ expected_warnings,
+ coercion_expected)
+
+ def test_test_PYTHONCOERCECLOCALE_not_set(self):
+ # This should coerce to the first available target locale by default
+ self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
+
+ def test_PYTHONCOERCECLOCALE_not_zero(self):
+ # *Any* string other than "0" is considered "set" for our purposes
+ # and hence should result in the locale coercion being enabled
+ for setting in ("", "1", "true", "false"):
+ self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
+
+ def test_PYTHONCOERCECLOCALE_set_to_warn(self):
+ # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
+ self._check_c_locale_coercion("utf-8", "utf-8",
+ coerce_c_locale="warn",
+ expected_warnings=[CLI_COERCION_WARNING])
+
+
+ def test_PYTHONCOERCECLOCALE_set_to_zero(self):
+ # The setting "0" should result in the locale coercion being disabled
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="0",
+ coercion_expected=False)
+ # Setting LC_ALL=C shouldn't make any difference to the behaviour
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="0",
+ LC_ALL="C",
+ coercion_expected=False)
+
+ def test_LC_ALL_set_to_C(self):
+ # Setting LC_ALL should render the locale coercion ineffective
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale=None,
+ LC_ALL="C",
+ coercion_expected=False)
+ # And result in a warning about a lack of locale compatibility
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="warn",
+ LC_ALL="C",
+ expected_warnings=[LEGACY_LOCALE_WARNING],
+ coercion_expected=False)
+
+def test_main():
+ test.support.run_unittest(
+ LocaleConfigurationTests,
+ LocaleCoercionTests
+ )
+ test.support.reap_children()
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 38156b4..5922ed9 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -153,6 +153,7 @@ class CmdLineTest(unittest.TestCase):
env = os.environ.copy()
# Use C locale to get ascii for the locale encoding
env['LC_ALL'] = 'C'
+ env['PYTHONCOERCECLOCALE'] = '0'
code = (
b'import locale; '
b'print(ascii("' + undecodable + b'"), '
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 7866a5c..b41239a 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -680,6 +680,7 @@ class SysModuleTest(unittest.TestCase):
# Force the POSIX locale
env = os.environ.copy()
env["LC_ALL"] = "C"
+ env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join((
'import sys',
'def dump(name):',
diff --git a/Modules/main.c b/Modules/main.c
index 585d696..96d8be4 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -107,7 +107,11 @@ static const char usage_6[] =
" predictable seed.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
-" hooks.\n";
+" hooks.\n"
+
+"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
+" coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n"
+" locale coercion and locale compatibility warnings on stderr.\n";
static int
usage(int exitcode, const wchar_t* program)
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index 813cf30..2a64092 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -1,4 +1,5 @@
#include <Python.h>
+#include "pyconfig.h"
#include "pythread.h"
#include <stdio.h>
diff --git a/Programs/python.c b/Programs/python.c
index a7afbc7..03f8295 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -15,6 +15,21 @@ wmain(int argc, wchar_t **argv)
}
#else
+/* Access private pylifecycle helper API to better handle the legacy C locale
+ *
+ * The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
+ * components like GNU readline.
+ *
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
+ * more capable UTF-8 based alternative.
+ *
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
+ *
+ */
+extern int _Py_LegacyLocaleDetected(void);
+extern void _Py_CoerceLegacyLocale(void);
+
int
main(int argc, char **argv)
{
@@ -25,7 +40,11 @@ main(int argc, char **argv)
char *oldloc;
/* Force malloc() allocator to bootstrap Python */
+#ifdef Py_DEBUG
+ (void)_PyMem_SetupAllocators("malloc_debug");
+# else
(void)_PyMem_SetupAllocators("malloc");
+# endif
argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
@@ -49,7 +68,21 @@ main(int argc, char **argv)
return 1;
}
+#ifdef __ANDROID__
+ /* Passing "" to setlocale() on Android requests the C locale rather
+ * than checking environment variables, so request C.UTF-8 explicitly
+ */
+ setlocale(LC_ALL, "C.UTF-8");
+#else
+ /* Reconfigure the locale to the default for this process */
setlocale(LC_ALL, "");
+#endif
+
+ if (_Py_LegacyLocaleDetected()) {
+ _Py_CoerceLegacyLocale();
+ }
+
+ /* Convert from char to wchar_t based on the locale settings */
for (i = 0; i < argc; i++) {
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
if (!argv_copy[i]) {
@@ -70,7 +103,11 @@ main(int argc, char **argv)
/* Force again malloc() allocator to release memory blocks allocated
before Py_Main() */
+#ifdef Py_DEBUG
+ (void)_PyMem_SetupAllocators("malloc_debug");
+# else
(void)_PyMem_SetupAllocators("malloc");
+# endif
for (i = 0; i < argc; i++) {
PyMem_RawFree(argv_copy2[i]);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index ecfdfee..4fee178 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
return 0;
}
+
/* Global initializations. Can be undone by Py_FinalizeEx(). Don't
call this twice without an intervening Py_FinalizeEx() call. When
initializations fail, a fatal error is issued and the function does
@@ -301,6 +302,183 @@ import_init(PyInterpreterState *interp, PyObject *sysmod)
}
+/* Helper functions to better handle the legacy C locale
+ *
+ * The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
+ * components like GNU readline.
+ *
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
+ * more capable UTF-8 based alternative as follows:
+ *
+ * if (_Py_LegacyLocaleDetected()) {
+ * _Py_CoerceLegacyLocale();
+ * }
+ *
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
+ *
+ * Locale coercion also impacts the default error handler for the standard
+ * streams: while the usual default is "strict", the default for the legacy
+ * C locale and for any of the coercion target locales is "surrogateescape".
+ */
+
+int
+_Py_LegacyLocaleDetected(void)
+{
+#ifndef MS_WINDOWS
+ /* On non-Windows systems, the C locale is considered a legacy locale */
+ /* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
+ * the POSIX locale as a simple alias for the C locale, so
+ * we may also want to check for that explicitly.
+ */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
+#else
+ /* Windows uses code pages instead of locales, so no locale is legacy */
+ return 0;
+#endif
+}
+
+
+static const char *_C_LOCALE_WARNING =
+ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
+ "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
+ "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
+ "locales is recommended.\n";
+
+static int
+_legacy_locale_warnings_enabled(void)
+{
+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
+ return (coerce_c_locale != NULL &&
+ strncmp(coerce_c_locale, "warn", 5) == 0);
+}
+
+static void
+_emit_stderr_warning_for_legacy_locale(void)
+{
+ if (_legacy_locale_warnings_enabled()) {
+ if (_Py_LegacyLocaleDetected()) {
+ fprintf(stderr, "%s", _C_LOCALE_WARNING);
+ }
+ }
+}
+
+typedef struct _CandidateLocale {
+ const char *locale_name; /* The locale to try as a coercion target */
+} _LocaleCoercionTarget;
+
+static _LocaleCoercionTarget _TARGET_LOCALES[] = {
+ {"C.UTF-8"},
+ {"C.utf8"},
+ {"UTF-8"},
+ {NULL}
+};
+
+static char *
+get_default_standard_stream_error_handler(void)
+{
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL) {
+ /* "surrogateescape" is the default in the legacy C locale */
+ if (strcmp(ctype_loc, "C") == 0) {
+ return "surrogateescape";
+ }
+
+#ifdef PY_COERCE_C_LOCALE
+ /* "surrogateescape" is the default in locale coercion target locales */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ if (strcmp(ctype_loc, target->locale_name) == 0) {
+ return "surrogateescape";
+ }
+ }
+#endif
+ }
+
+ /* Otherwise return NULL to request the typical default error handler */
+ return NULL;
+}
+
+#ifdef PY_COERCE_C_LOCALE
+static const char *_C_LOCALE_COERCION_WARNING =
+ "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
+
+static void
+_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+{
+
+ const char *newloc = target->locale_name;
+
+ /* Reset locale back to currently configured defaults */
+ setlocale(LC_ALL, "");
+
+ /* Set the relevant locale environment variable */
+ if (setenv("LC_CTYPE", newloc, 1)) {
+ fprintf(stderr,
+ "Error setting LC_CTYPE, skipping C locale coercion\n");
+ return;
+ }
+ if (_legacy_locale_warnings_enabled()) {
+ fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
+ }
+
+ /* Reconfigure with the overridden environment variables */
+ setlocale(LC_ALL, "");
+}
+#endif
+
+
+void
+_Py_CoerceLegacyLocale(void)
+{
+#ifdef PY_COERCE_C_LOCALE
+ /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
+ * the locale settings *before* we try to do anything with the command
+ * line arguments. For cross-platform debugging purposes, we also need
+ * to give end users a way to force even scripts that are otherwise
+ * isolated from their environment to use the legacy ASCII-centric C
+ * locale.
+ *
+ * Ignoring -E and -I is safe from a security perspective, as we only use
+ * the setting to turn *off* the implicit locale coercion, and anyone with
+ * access to the process environment already has the ability to set
+ * `LC_ALL=C` to override the C level locale settings anyway.
+ */
+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
+ /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
+ const char *locale_override = getenv("LC_ALL");
+ if (locale_override == NULL || *locale_override == '\0') {
+ /* LC_ALL is also not set (or is set to an empty string) */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ const char *new_locale = setlocale(LC_CTYPE,
+ target->locale_name);
+ if (new_locale != NULL) {
+#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
+ /* Also ensure that nl_langinfo works in this locale */
+ char *codeset = nl_langinfo(CODESET);
+ if (!codeset || *codeset == '\0') {
+ /* CODESET is not set or empty, so skip coercion */
+ new_locale = NULL;
+ setlocale(LC_CTYPE, "");
+ continue;
+ }
+#endif
+ /* Successfully configured locale, so make it the default */
+ _coerce_default_locale_settings(target);
+ return;
+ }
+ }
+ }
+ }
+ /* No C locale warning here, as Py_Initialize will emit one later */
+#endif
+}
+
+
void
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
{
@@ -315,11 +493,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
initialized = 1;
_Py_Finalizing = NULL;
-#ifdef HAVE_SETLOCALE
+#ifdef __ANDROID__
+ /* Passing "" to setlocale() on Android requests the C locale rather
+ * than checking environment variables, so request C.UTF-8 explicitly
+ */
+ setlocale(LC_CTYPE, "C.UTF-8");
+#else
+#ifndef MS_WINDOWS
/* Set up the LC_CTYPE locale, so we can obtain
the locale's charset without having to switch
locales. */
setlocale(LC_CTYPE, "");
+ _emit_stderr_warning_for_legacy_locale();
+#endif
#endif
if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
@@ -1247,12 +1433,8 @@ initstdio(void)
}
}
if (!errors && !(pythonioencoding && *pythonioencoding)) {
- /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
- stdin and stdout use the surrogateescape error handler by
- default, instead of the strict error handler. */
- char *loc = setlocale(LC_CTYPE, NULL);
- if (loc != NULL && strcmp(loc, "C") == 0)
- errors = "surrogateescape";
+ /* Choose the default error handler based on the current locale */
+ errors = get_default_standard_stream_error_handler();
}
}
diff --git a/configure.ac b/configure.ac
index 3f2459a..7444486 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3360,6 +3360,40 @@ then
fi
AC_MSG_RESULT($with_pymalloc)
+# Check for --with-c-locale-coercion
+AC_MSG_CHECKING(for --with-c-locale-coercion)
+AC_ARG_WITH(c-locale-coercion,
+ AS_HELP_STRING([--with(out)-c-locale-coercion],
+ [disable/enable C locale coercion to a UTF-8 based locale]))
+
+if test -z "$with_c_locale_coercion"
+then
+ with_c_locale_coercion="yes"
+fi
+if test "$with_c_locale_coercion" != "no"
+then
+ AC_DEFINE(PY_COERCE_C_LOCALE, 1,
+ [Define if you want to coerce the C locale to a UTF-8 based locale])
+fi
+AC_MSG_RESULT($with_c_locale_coercion)
+
+# Check for --with-c-locale-warning
+AC_MSG_CHECKING(for --with-c-locale-warning)
+AC_ARG_WITH(c-locale-warning,
+ AS_HELP_STRING([--with(out)-c-locale-warning],
+ [disable/enable locale compatibility warning in the C locale]))
+
+if test -z "$with_c_locale_warning"
+then
+ with_c_locale_warning="yes"
+fi
+if test "$with_c_locale_warning" != "no"
+then
+ AC_DEFINE(PY_WARN_ON_C_LOCALE, 1,
+ [Define to emit a locale compatibility warning in the C locale])
+fi
+AC_MSG_RESULT($with_c_locale_warning)
+
# Check for Valgrind support
AC_MSG_CHECKING([for --with-valgrind])
AC_ARG_WITH([valgrind],

@ -0,0 +1,228 @@
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 1f3a31a..b54a684 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -116,6 +116,7 @@ except ImportError:
from _ssl import HAS_SNI, HAS_ECDH, HAS_NPN, HAS_ALPN, HAS_TLSv1_3
+from _ssl import _DEFAULT_CIPHERS
from _ssl import _OPENSSL_API_VERSION
@@ -174,48 +175,7 @@ else:
CHANNEL_BINDING_TYPES = []
-# Disable weak or insecure ciphers by default
-# (OpenSSL's default setting is 'DEFAULT:!aNULL:!eNULL')
-# Enable a better set of ciphers by default
-# This list has been explicitly chosen to:
-# * TLS 1.3 ChaCha20 and AES-GCM cipher suites
-# * Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE)
-# * Prefer ECDHE over DHE for better performance
-# * Prefer AEAD over CBC for better performance and security
-# * Prefer AES-GCM over ChaCha20 because most platforms have AES-NI
-# (ChaCha20 needs OpenSSL 1.1.0 or patched 1.0.2)
-# * Prefer any AES-GCM and ChaCha20 over any AES-CBC for better
-# performance and security
-# * Then Use HIGH cipher suites as a fallback
-# * Disable NULL authentication, NULL encryption, 3DES and MD5 MACs
-# for security reasons
-_DEFAULT_CIPHERS = (
- 'TLS13-AES-256-GCM-SHA384:TLS13-CHACHA20-POLY1305-SHA256:'
- 'TLS13-AES-128-GCM-SHA256:'
- 'ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:ECDH+AES256:DH+AES256:'
- 'ECDH+AES128:DH+AES:ECDH+HIGH:DH+HIGH:RSA+AESGCM:RSA+AES:RSA+HIGH:'
- '!aNULL:!eNULL:!MD5:!3DES'
- )
-
-# Restricted and more secure ciphers for the server side
-# This list has been explicitly chosen to:
-# * TLS 1.3 ChaCha20 and AES-GCM cipher suites
-# * Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE)
-# * Prefer ECDHE over DHE for better performance
-# * Prefer AEAD over CBC for better performance and security
-# * Prefer AES-GCM over ChaCha20 because most platforms have AES-NI
-# * Prefer any AES-GCM and ChaCha20 over any AES-CBC for better
-# performance and security
-# * Then Use HIGH cipher suites as a fallback
-# * Disable NULL authentication, NULL encryption, MD5 MACs, DSS, RC4, and
-# 3DES for security reasons
-_RESTRICTED_SERVER_CIPHERS = (
- 'TLS13-AES-256-GCM-SHA384:TLS13-CHACHA20-POLY1305-SHA256:'
- 'TLS13-AES-128-GCM-SHA256:'
- 'ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:ECDH+AES256:DH+AES256:'
- 'ECDH+AES128:DH+AES:ECDH+HIGH:DH+HIGH:RSA+AESGCM:RSA+AES:RSA+HIGH:'
- '!aNULL:!eNULL:!MD5:!DSS:!RC4:!3DES'
-)
+_RESTRICTED_SERVER_CIPHERS = _DEFAULT_CIPHERS
class CertificateError(ValueError):
@@ -389,8 +349,6 @@ class SSLContext(_SSLContext):
def __new__(cls, protocol=PROTOCOL_TLS, *args, **kwargs):
self = _SSLContext.__new__(cls, protocol)
- if protocol != _SSLv2_IF_EXISTS:
- self.set_ciphers(_DEFAULT_CIPHERS)
return self
def __init__(self, protocol=PROTOCOL_TLS):
@@ -505,8 +463,6 @@ def create_default_context(purpose=Purpose.SERVER_AUTH, *, cafile=None,
# verify certs and host name in client mode
context.verify_mode = CERT_REQUIRED
context.check_hostname = True
- elif purpose == Purpose.CLIENT_AUTH:
- context.set_ciphers(_RESTRICTED_SERVER_CIPHERS)
if cafile or capath or cadata:
context.load_verify_locations(cafile, capath, cadata)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 9785a59..34a7ec2 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -18,6 +18,7 @@ import asyncore
import weakref
import platform
import functools
+import sysconfig
try:
import ctypes
except ImportError:
@@ -36,7 +37,7 @@ PROTOCOLS = sorted(ssl._PROTOCOL_NAMES)
HOST = support.HOST
IS_LIBRESSL = ssl.OPENSSL_VERSION.startswith('LibreSSL')
IS_OPENSSL_1_1 = not IS_LIBRESSL and ssl.OPENSSL_VERSION_INFO >= (1, 1, 0)
-
+PY_SSL_DEFAULT_CIPHERS = sysconfig.get_config_var('PY_SSL_DEFAULT_CIPHERS')
def data_file(*name):
return os.path.join(os.path.dirname(__file__), *name)
@@ -889,6 +890,19 @@ class ContextTests(unittest.TestCase):
with self.assertRaisesRegex(ssl.SSLError, "No cipher can be selected"):
ctx.set_ciphers("^$:,;?*'dorothyx")
+ @unittest.skipUnless(PY_SSL_DEFAULT_CIPHERS == 1,
+ "Test applies only to Python default ciphers")
+ def test_python_ciphers(self):
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ ciphers = ctx.get_ciphers()
+ for suite in ciphers:
+ name = suite['name']
+ self.assertNotIn("PSK", name)
+ self.assertNotIn("SRP", name)
+ self.assertNotIn("MD5", name)
+ self.assertNotIn("RC4", name)
+ self.assertNotIn("3DES", name)
+
@unittest.skipIf(ssl.OPENSSL_VERSION_INFO < (1, 0, 2, 0, 0), 'OpenSSL too old')
def test_get_ciphers(self):
ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
diff --git a/Modules/_ssl.c b/Modules/_ssl.c
index 5e007da..130f006 100644
--- a/Modules/_ssl.c
+++ b/Modules/_ssl.c
@@ -237,6 +237,31 @@ SSL_SESSION_get_ticket_lifetime_hint(const SSL_SESSION *s)
#endif /* OpenSSL < 1.1.0 or LibreSSL < 2.7.0 */
+/* Default cipher suites */
+#ifndef PY_SSL_DEFAULT_CIPHERS
+#define PY_SSL_DEFAULT_CIPHERS 1
+#endif
+
+#if PY_SSL_DEFAULT_CIPHERS == 0
+ #ifndef PY_SSL_DEFAULT_CIPHER_STRING
+ #error "Py_SSL_DEFAULT_CIPHERS 0 needs Py_SSL_DEFAULT_CIPHER_STRING"
+ #endif
+#elif PY_SSL_DEFAULT_CIPHERS == 1
+/* Python custom selection of sensible ciper suites
+ * DEFAULT: OpenSSL's default cipher list. Since 1.0.2 the list is in sensible order.
+ * !aNULL:!eNULL: really no NULL ciphers
+ * !MD5:!3DES:!DES:!RC4:!IDEA:!SEED: no weak or broken algorithms on old OpenSSL versions.
+ * !aDSS: no authentication with discrete logarithm DSA algorithm
+ * !SRP:!PSK: no secure remote password or pre-shared key authentication
+ */
+ #define PY_SSL_DEFAULT_CIPHER_STRING "DEFAULT:!aNULL:!eNULL:!MD5:!3DES:!DES:!RC4:!IDEA:!SEED:!aDSS:!SRP:!PSK"
+#elif PY_SSL_DEFAULT_CIPHERS == 2
+/* Ignored in SSLContext constructor, only used to as _ssl.DEFAULT_CIPHER_STRING */
+ #define PY_SSL_DEFAULT_CIPHER_STRING SSL_DEFAULT_CIPHER_LIST
+#else
+ #error "Unsupported PY_SSL_DEFAULT_CIPHERS"
+#endif
+
enum py_ssl_error {
/* these mirror ssl.h */
@@ -2803,7 +2828,12 @@ _ssl__SSLContext_impl(PyTypeObject *type, int proto_version)
/* A bare minimum cipher list without completely broken cipher suites.
* It's far from perfect but gives users a better head start. */
if (proto_version != PY_SSL_VERSION_SSL2) {
- result = SSL_CTX_set_cipher_list(ctx, "HIGH:!aNULL:!eNULL:!MD5");
+#if PY_SSL_DEFAULT_CIPHERS == 2
+ /* stick to OpenSSL's default settings */
+ result = 1;
+#else
+ result = SSL_CTX_set_cipher_list(ctx, PY_SSL_DEFAULT_CIPHER_STRING);
+#endif
} else {
/* SSLv2 needs MD5 */
result = SSL_CTX_set_cipher_list(ctx, "HIGH:!aNULL:!eNULL");
@@ -5343,6 +5373,9 @@ PyInit__ssl(void)
(PyObject *)&PySSLSession_Type) != 0)
return NULL;
+ PyModule_AddStringConstant(m, "_DEFAULT_CIPHERS",
+ PY_SSL_DEFAULT_CIPHER_STRING);
+
PyModule_AddIntConstant(m, "SSL_ERROR_ZERO_RETURN",
PY_SSL_ERROR_ZERO_RETURN);
PyModule_AddIntConstant(m, "SSL_ERROR_WANT_READ",
diff --git a/configure.ac b/configure.ac
index 3703701..2eff514 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5598,6 +5598,42 @@ if test "$have_getrandom" = yes; then
[Define to 1 if the getrandom() function is available])
fi
+# ssl module default cipher suite string
+AH_TEMPLATE(PY_SSL_DEFAULT_CIPHERS,
+ [Default cipher suites list for ssl module.
+ 1: Python's preferred selection, 2: leave OpenSSL defaults untouched, 0: custom string])
+AH_TEMPLATE(PY_SSL_DEFAULT_CIPHER_STRING,
+ [Cipher suite string for PY_SSL_DEFAULT_CIPHERS=0]
+)
+AC_MSG_CHECKING(for --with-ssl-default-suites)
+AC_ARG_WITH(ssl-default-suites,
+ AS_HELP_STRING([--with-ssl-default-suites=@<:@python|openssl|STRING@:>@],
+ [Override default cipher suites string,
+ python: use Python's preferred selection (default),
+ openssl: leave OpenSSL's defaults untouched,
+ STRING: use a custom string,
+ PROTOCOL_SSLv2 ignores the setting]),
+[
+AC_MSG_RESULT($withval)
+case "$withval" in
+ python)
+ AC_DEFINE(PY_SSL_DEFAULT_CIPHERS, 1)
+ ;;
+ openssl)
+ AC_DEFINE(PY_SSL_DEFAULT_CIPHERS, 2)
+ ;;
+ *)
+ AC_DEFINE(PY_SSL_DEFAULT_CIPHERS, 0)
+ AC_DEFINE_UNQUOTED(PY_SSL_DEFAULT_CIPHER_STRING, "$withval")
+ ;;
+esac
+],
+[
+AC_MSG_RESULT(python)
+AC_DEFINE(PY_SSL_DEFAULT_CIPHERS, 1)
+])
+
+
# generate output files
AC_CONFIG_FILES(Makefile.pre Modules/Setup.config Misc/python.pc Misc/python-config.sh)
AC_CONFIG_FILES([Modules/ld_so_aix], [chmod +x Modules/ld_so_aix])

@ -0,0 +1,14 @@
diff --git a/Lib/distutils/command/bdist_wininst.py b/Lib/distutils/command/bdist_wininst.py
index fde5675..15434c3 100644
--- a/Lib/distutils/command/bdist_wininst.py
+++ b/Lib/distutils/command/bdist_wininst.py
@@ -55,6 +55,9 @@ class bdist_wininst(Command):
boolean_options = ['keep-temp', 'no-target-compile', 'no-target-optimize',
'skip-build']
+ # bpo-10945: bdist_wininst requires mbcs encoding only available on Windows
+ _unsupported = (sys.platform != "win32")
+
def initialize_options(self):
self.bdist_dir = None
self.plat_name = None

@ -0,0 +1,111 @@
From c660debb97f4f422255a82fef2d77804552c043a Mon Sep 17 00:00:00 2001
From: Christian Heimes <christian@python.org>
Date: Tue, 15 Jan 2019 18:16:30 +0100
Subject: [PATCH] bpo-35746: Fix segfault in ssl's cert parser
CVE-2019-5010, Fix a NULL pointer deref in ssl module. The cert parser did
not handle CRL distribution points with empty DP or URI correctly. A
malicious or buggy certificate can result into segfault.
Signed-off-by: Christian Heimes <christian@python.org>
---
Lib/test/talos-2019-0758.pem | 22 +++++++++++++++++++
Lib/test/test_ssl.py | 22 +++++++++++++++++++
.../2019-01-15-18-16-05.bpo-35746.nMSd0j.rst | 3 +++
Modules/_ssl.c | 4 ++++
4 files changed, 51 insertions(+)
create mode 100644 Lib/test/talos-2019-0758.pem
create mode 100644 Misc/NEWS.d/next/Security/2019-01-15-18-16-05.bpo-35746.nMSd0j.rst
diff --git a/Lib/test/talos-2019-0758.pem b/Lib/test/talos-2019-0758.pem
new file mode 100644
index 000000000000..13b95a77fd8a
--- /dev/null
+++ b/Lib/test/talos-2019-0758.pem
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDqDCCApKgAwIBAgIBAjALBgkqhkiG9w0BAQswHzELMAkGA1UEBhMCVUsxEDAO
+BgNVBAMTB2NvZHktY2EwHhcNMTgwNjE4MTgwMDU4WhcNMjgwNjE0MTgwMDU4WjA7
+MQswCQYDVQQGEwJVSzEsMCoGA1UEAxMjY29kZW5vbWljb24tdm0tMi50ZXN0Lmxh
+bC5jaXNjby5jb20wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC63fGB
+J80A9Av1GB0bptslKRIUtJm8EeEu34HkDWbL6AJY0P8WfDtlXjlPaLqFa6sqH6ES
+V48prSm1ZUbDSVL8R6BYVYpOlK8/48xk4pGTgRzv69gf5SGtQLwHy8UPBKgjSZoD
+5a5k5wJXGswhKFFNqyyxqCvWmMnJWxXTt2XDCiWc4g4YAWi4O4+6SeeHVAV9rV7C
+1wxqjzKovVe2uZOHjKEzJbbIU6JBPb6TRfMdRdYOw98n1VXDcKVgdX2DuuqjCzHP
+WhU4Tw050M9NaK3eXp4Mh69VuiKoBGOLSOcS8reqHIU46Reg0hqeL8LIL6OhFHIF
+j7HR6V1X6F+BfRS/AgMBAAGjgdYwgdMwCQYDVR0TBAIwADAdBgNVHQ4EFgQUOktp
+HQjxDXXUg8prleY9jeLKeQ4wTwYDVR0jBEgwRoAUx6zgPygZ0ZErF9sPC4+5e2Io
+UU+hI6QhMB8xCzAJBgNVBAYTAlVLMRAwDgYDVQQDEwdjb2R5LWNhggkA1QEAuwb7
+2s0wCQYDVR0SBAIwADAuBgNVHREEJzAlgiNjb2Rlbm9taWNvbi12bS0yLnRlc3Qu
+bGFsLmNpc2NvLmNvbTAOBgNVHQ8BAf8EBAMCBaAwCwYDVR0fBAQwAjAAMAsGCSqG
+SIb3DQEBCwOCAQEAvqantx2yBlM11RoFiCfi+AfSblXPdrIrHvccepV4pYc/yO6p
+t1f2dxHQb8rWH3i6cWag/EgIZx+HJQvo0rgPY1BFJsX1WnYf1/znZpkUBGbVmlJr
+t/dW1gSkNS6sPsM0Q+7HPgEv8CPDNK5eo7vU2seE0iWOkxSyVUuiCEY9ZVGaLVit
+p0C78nZ35Pdv4I+1cosmHl28+es1WI22rrnmdBpH8J1eY6WvUw2xuZHLeNVN0TzV
+Q3qq53AaCWuLOD1AjESWuUCxMZTK9DPS4JKXTK8RLyDeqOvJGjsSWp3kL0y3GaQ+
+10T1rfkKJub2+m9A9duin1fn6tHc2wSvB7m3DA==
+-----END CERTIFICATE-----
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 7f6b93148f45..1fc657f4d867 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -115,6 +115,7 @@ def data_file(*name):
BADKEY = data_file("badkey.pem")
NOKIACERT = data_file("nokia.pem")
NULLBYTECERT = data_file("nullbytecert.pem")
+TALOS_INVALID_CRLDP = data_file("talos-2019-0758.pem")
DHFILE = data_file("ffdh3072.pem")
BYTES_DHFILE = os.fsencode(DHFILE)
@@ -348,6 +349,27 @@ def test_parse_cert(self):
self.assertEqual(p['crlDistributionPoints'],
('http://SVRIntl-G3-crl.verisign.com/SVRIntlG3.crl',))
+ def test_parse_cert_CVE_2019_5010(self):
+ p = ssl._ssl._test_decode_cert(TALOS_INVALID_CRLDP)
+ if support.verbose:
+ sys.stdout.write("\n" + pprint.pformat(p) + "\n")
+ self.assertEqual(
+ p,
+ {
+ 'issuer': (
+ (('countryName', 'UK'),), (('commonName', 'cody-ca'),)),
+ 'notAfter': 'Jun 14 18:00:58 2028 GMT',
+ 'notBefore': 'Jun 18 18:00:58 2018 GMT',
+ 'serialNumber': '02',
+ 'subject': ((('countryName', 'UK'),),
+ (('commonName',
+ 'codenomicon-vm-2.test.lal.cisco.com'),)),
+ 'subjectAltName': (
+ ('DNS', 'codenomicon-vm-2.test.lal.cisco.com'),),
+ 'version': 3
+ }
+ )
+
def test_parse_cert_CVE_2013_4238(self):
p = ssl._ssl._test_decode_cert(NULLBYTECERT)
if support.verbose:
diff --git a/Misc/NEWS.d/next/Security/2019-01-15-18-16-05.bpo-35746.nMSd0j.rst b/Misc/NEWS.d/next/Security/2019-01-15-18-16-05.bpo-35746.nMSd0j.rst
new file mode 100644
index 000000000000..dffe347eec84
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2019-01-15-18-16-05.bpo-35746.nMSd0j.rst
@@ -0,0 +1,3 @@
+[CVE-2019-5010] Fix a NULL pointer deref in ssl module. The cert parser did
+not handle CRL distribution points with empty DP or URI correctly. A
+malicious or buggy certificate can result into segfault.
diff --git a/Modules/_ssl.c b/Modules/_ssl.c
index 4e3352d9e661..0e720e268d93 100644
--- a/Modules/_ssl.c
+++ b/Modules/_ssl.c
@@ -1515,6 +1515,10 @@ _get_crl_dp(X509 *certificate) {
STACK_OF(GENERAL_NAME) *gns;
dp = sk_DIST_POINT_value(dps, i);
+ if (dp->distpoint == NULL) {
+ /* Ignore empty DP value, CVE-2019-5010 */
+ continue;
+ }
gns = dp->distpoint->name.fullname;
for (j=0; j < sk_GENERAL_NAME_num(gns); j++) {

@ -0,0 +1,949 @@
From 412ccf4c6f8c417006c0a93392a8274a425074c0 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@redhat.com>
Date: Wed, 29 May 2019 04:04:54 +0200
Subject: [PATCH 1/5] bpo-32947: test_ssl fixes for TLS 1.3 and OpenSSL 1.1.1
(GH-11612)
Backport partially commit 529525fb5a8fd9b96ab4021311a598c77588b918:
complete the previous partial backport (commit
2a4ee8aa01d61b6a9c8e9c65c211e61bdb471826.
Co-Authored-By: Christian Heimes <christian@python.org>
---
Lib/test/test_ssl.py | 15 +++++++++++++++
.../2019-01-18-17-46-10.bpo-32947.Hk0KnM.rst | 1 +
2 files changed, 16 insertions(+)
create mode 100644 Misc/NEWS.d/next/Tests/2019-01-18-17-46-10.bpo-32947.Hk0KnM.rst
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index cb0acda..639109f 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -2043,6 +2043,16 @@ if _have_threads:
sys.stdout.write(" server: read %r (%s), sending back %r (%s)...\n"
% (msg, ctype, msg.lower(), ctype))
self.write(msg.lower())
+ except ConnectionResetError:
+ # XXX: OpenSSL 1.1.1 sometimes raises ConnectionResetError
+ # when connection is not shut down gracefully.
+ if self.server.chatty and support.verbose:
+ sys.stdout.write(
+ " Connection reset by peer: {}\n".format(
+ self.addr)
+ )
+ self.close()
+ self.running = False
except OSError:
if self.server.chatty:
handle_error("Test server failure:\n")
@@ -2122,6 +2132,11 @@ if _have_threads:
pass
except KeyboardInterrupt:
self.stop()
+ except BaseException as e:
+ if support.verbose and self.chatty:
+ sys.stdout.write(
+ ' connection handling failed: ' + repr(e) + '\n')
+
self.sock.close()
def stop(self):
diff --git a/Misc/NEWS.d/next/Tests/2019-01-18-17-46-10.bpo-32947.Hk0KnM.rst b/Misc/NEWS.d/next/Tests/2019-01-18-17-46-10.bpo-32947.Hk0KnM.rst
new file mode 100644
index 0000000..f508504
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-01-18-17-46-10.bpo-32947.Hk0KnM.rst
@@ -0,0 +1 @@
+test_ssl fixes for TLS 1.3 and OpenSSL 1.1.1.
--
2.21.0
From 6b728ec778067849dd1f0d9b73cf1ac47dafa270 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 25 Sep 2019 09:12:59 -0700
Subject: [PATCH 2/5] bpo-38271: encrypt private key test files with AES256
(GH-16385)
The private keys for test_ssl were encrypted with 3DES in traditional
PKCSGH-5 format. 3DES and the digest algorithm of PKCSGH-5 are blocked by
some strict crypto policies. Use PKCSGH-8 format with AES256 encryption
instead.
Signed-off-by: Christian Heimes <christian@python.org>
https://bugs.python.org/issue38271
Automerge-Triggered-By: @tiran
(cherry picked from commit bfd0c963d88f3df69489ee250655e2b8f3d235bd)
Co-authored-by: Christian Heimes <christian@python.org>
---
Lib/test/keycert.passwd.pem | 85 ++++++++++---------
Lib/test/make_ssl_certs.py | 4 +-
Lib/test/ssl_key.passwd.pem | 84 +++++++++---------
.../2019-09-25-13-11-29.bpo-38271.iHXNIg.rst | 4 +
4 files changed, 91 insertions(+), 86 deletions(-)
create mode 100644 Misc/NEWS.d/next/Tests/2019-09-25-13-11-29.bpo-38271.iHXNIg.rst
diff --git a/Lib/test/keycert.passwd.pem b/Lib/test/keycert.passwd.pem
index cbb3c3b..c330c36 100644
--- a/Lib/test/keycert.passwd.pem
+++ b/Lib/test/keycert.passwd.pem
@@ -1,45 +1,45 @@
------BEGIN RSA PRIVATE KEY-----
-Proc-Type: 4,ENCRYPTED
-DEK-Info: DES-EDE3-CBC,D134E931C96D9DEC
-
-nuGFEej7vIjkYWSMz5OJeVTNntDRQi6ZM4DBm3g8T7i/0odr3WFqGMMKZcIhLYQf
-rgRq7RSKtrJ1y5taVucMV+EuCjyfzDo0TsYt+ZrXv/D08eZhjRmkhoHnGVF0TqQm
-nQEXM/ERT4J2RM78dnG+homMkI76qOqxgGbRqQqJo6AiVRcAZ45y8s96bru2TAB8
-+pWjO/v0Je7AFVdwSU52N8OOY6uoSAygW+0UY1WVxbVGJF2XfRsNpPX+YQHYl6e+
-3xM5XBVCgr6kmdAyub5qUJ38X3TpdVGoR0i+CVS9GTr2pSRib1zURAeeHnlqiUZM
-4m0Gn9s72nJevU1wxED8pwOhR8fnHEmMKGD2HPhKoOCbzDhwwBZO27TNa1uWeM3f
-M5oixKDi2PqMn3y2cDx1NjJtP661688EcJ5a2Ih9BgO9xpnhSyzBWEKcAn0tJB0H
-/56M0FW6cdOOIzMveGGL7sHW5E+iOdI1n5e7C6KJUzew78Y9qJnhS53EdI6qTz9R
-wsIsj1i070Fk6RbPo6zpLlF6w7Zj8GlZaZA7OZZv9wo5VEV/0ST8gmiiBOBc4C6Y
-u9hyLIIu4dFEBKyQHRvBnQSLNpKx6or1OGFDVBay2In9Yh2BHh1+vOj/OIz/wq48
-EHOIV27fRJxLu4jeK5LIGDhuPnMJ8AJYQ0bQOUP6fd7p+TxWkAQZPB/Dx/cs3hxr
-nFEdzx+eO+IAsObx/b1EGZyEJyETBslu4GwYX7/KK3HsJhDJ1bdZ//28jOCaoir6
-ZOMT72GRwmVoQTJ0XpccfjHfKJDRLT7C1xvzo4Eibth0hpTZkA75IUYUp6qK/PuJ
-kH/qdiC7QIkRKtsrawW4vEDna3YtxIYhQqz9+KwO6u/0gzooZtv1RU4U3ifMDB5u
-5P5GAzACRqlY8QYBkM869lvWqzQPHvybC4ak9Yx6/heMO9ddjdIW9BaK8BLxvN/6
-UCD936Y4fWltt09jHZIoxWFykouBwmd7bXooNYXmDRNmjTdVhKJuOEOQw8hDzx7e
-pWFJ9Z/V4Qm1tvXbCD7QFqMCDoY3qFvVG8DBqXpmxe1yPfz21FWrT7IuqDXAD3ns
-vxfN/2a+Cy04U9FBNVCvWqWIs5AgNpdCMJC2FlXKTy+H3/7rIjNyFyvbX0vxIXtK
-liOVNXiyVM++KZXqktqMUDlsJENmIHV9B046luqbgW018fHkyEYlL3iRZGbYegwr
-XO9VVIKVPw1BEvJ8VNdGFGuZGepd8qX2ezfYADrNR+4t85HDm8inbjTobSjWuljs
-ftUNkOeCHqAvWCFQTLCfdykvV08EJfVY79y7yFPtfRV2gxYokXFifjo3su9sVQr1
-UiIS5ZAsIC1hBXWeXoBN7QVTkFi7Yto6E1q2k10LiT3obpUUUQ/oclhrJOCJVjrS
-oRcj2QBy8OT4T9slJr5maTWdgd7Lt6+I6cGQXPaDvjGOJl0eBYM14vhx4rRQWytJ
-k07hhHFO4+9CGCuHS8AAy2gR6acYFWt2ZiiNZ0z/iPIHNK4YEyy9aLf6uZH/KQjE
-jmHToo7XD6QvCAEC5qTHby3o3LfHIhyZi/4L+AhS4FKUHF6M0peeyYt4z3HaK2d2
-N6mHLPdjwNjra7GOmcns4gzcrdfoF+R293KpPal4PjknvR3dZL4kKP/ougTAM5zv
-qDIvRbkHzjP8ChTpoLcJsNVXykNcNkjcSi0GHtIpYjh6QX6P2uvR/S4+Bbb9p9rn
-hIy/ovu9tWN2hiPxGPe6torF6BulAxsTYlDercC204AyzsrdA0pr6HBgJH9C6ML1
-TchwodbFJqn9rSv91i1liusAGoOvE81AGBdrXY7LxfSNhYY1IK6yR/POJPTd53sA
-uX2/j6Rtoksd/2BHPM6AUnI/2B9slhuzWX2aCtWLeuwvXDS6rYuTigaQmLkzTRfM
-dlMI3s9KLXxgi5YVumUZleJWXwBNP7KiKajd+VTSD+7WAhyhM5FIG5wVOaxmy4G2
-TyqZ/Ax9d2VEjTQHWvQlLPQ4Mp0EIz0aEl94K/S8CK8bJRH6+PRkar+dJi1xqlL+
-BYb42At9mEJ8odLlFikvNi1+t7jqXk5jRi5C0xFKx3nTtzoH2zNUeuA3R6vSocVK
-45jnze9IkKmxMlJ4loR5sgszdpDCD3kXqjtCcbMTmcrGyzJek3HSOTpiEORoTFOe
-Rhg6jH5lm+QcC263oipojS0qEQcnsWJP2CylNYMYHR9O/9NQxT3o2lsRHqZTMELV
-uQa/SFH+paQNbZOj8MRwPSqqiIxJFuLswKte1R+W7LKn1yBSM7Pp39lNbzGvJD2E
-YRfnCwFpJ54voVAuQ4jXJvigCW2qeCjXlxeD6K2j4eGJEEOmIjIW1wjubyBY6OI3
------END RSA PRIVATE KEY-----
+-----BEGIN ENCRYPTED PRIVATE KEY-----
+MIIHbTBXBgkqhkiG9w0BBQ0wSjApBgkqhkiG9w0BBQwwHAQIhD+rJdxqb6ECAggA
+MAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBDTdyjCP3riOSUfxix4aXEvBIIH
+ECGkbsFabrcFMZcplw5jHMaOlG7rYjUzwDJ80JM8uzbv2Jb8SvNlns2+xmnEvH/M
+mNvRmnXmplbVjH3XBMK8o2Psnr2V/a0j7/pgqpRxHykG+koOY4gzdt3MAg8JPbS2
+hymSl+Y5EpciO3xLfz4aFL1ZNqspQbO/TD13Ij7DUIy7xIRBMp4taoZCrP0cEBAZ
++wgu9m23I4dh3E8RUBzWyFFNic2MVVHrui6JbHc4dIHfyKLtXJDhUcS0vIC9PvcV
+jhorh3UZC4lM+/jjXV5AhzQ0VrJ2tXAUX2dA144XHzkSH2QmwfnajPsci7BL2CGC
+rjyTy4NfB/lDwU+55dqJZQSKXMxAapJMrtgw7LD5CKQcN6zmfhXGssJ7HQUXKkaX
+I1YOFzuUD7oo56BVCnVswv0jX9RxrE5QYNreMlOP9cS+kIYH65N+PAhlURuQC14K
+PgDkHn5knSa2UQA5tc5f7zdHOZhGRUfcjLP+KAWA3nh+/2OKw/X3zuPx75YT/FKe
+tACPw5hjEpl62m9Xa0eWepZXwqkIOkzHMmCyNCsbC0mmRoEjmvfnslfsmnh4Dg/c
+4YsTYMOLLIeCa+WIc38aA5W2lNO9lW0LwLhX1rP+GRVPv+TVHXlfoyaI+jp0iXrJ
+t3xxT0gaiIR/VznyS7Py68QV/zB7VdqbsNzS7LdquHK1k8+7OYiWjY3gqyU40Iu2
+d1eSnIoDvQJwyYp7XYXbOlXNLY+s1Qb7yxcW3vXm0Bg3gKT8r1XHWJ9rj+CxAn5r
+ysfkPs1JsesxzzQjwTiDNvHnBnZnwxuxfBr26ektEHmuAXSl8V6dzLN/aaPjpTj4
+CkE7KyqX3U9bLkp+ztl4xWKEmW44nskzm0+iqrtrxMyTfvvID4QrABjZL4zmWIqc
+e3ZfA3AYk9VDIegk/YKGC5VZ8YS7ZXQ0ASK652XqJ7QlMKTxxV7zda6Fp4uW6/qN
+ezt5wgbGGhZQXj2wDQmWNQYyG/juIgYTpCUA54U5XBIjuR6pg+Ytm0UrvNjsUoAC
+wGelyqaLDq8U8jdIFYVTJy9aJjQOYXjsUJ0dZN2aGHSlju0ZGIZc49cTIVQ9BTC5
+Yc0Vlwzpl+LuA25DzKZNSb/ci0lO/cQGJ2uXQQgaNgdsHlu8nukENGJhnIzx4fzK
+wEh3yHxhTRCzPPwDfXmx0IHXrPqJhSpAgaXBVIm8OjvmMxO+W75W4uLfNY/B7e2H
+3cjklGuvkofOf7sEOrGUYf4cb6Obg8FpvHgpKo5Twwmoh/qvEKckBFqNhZXDDl88
+GbGlSEgyaAV1Ig8s1NJKBolWFa0juyPAwJ8vT1T4iwW7kQ7KXKt2UNn96K/HxkLu
+pikvukz8oRHMlfVHa0R48UB1fFHwZLzPmwkpu6ancIxk3uO3yfhf6iDk3bmnyMlz
+g3k/b6MrLYaOVByRxay85jH3Vvgqfgn6wa6BJ7xQ81eZ8B45gFuTH0J5JtLL7SH8
+darRPLCYfA+Ums9/H6pU5EXfd3yfjMIbvhCXHkJrrljkZ+th3p8dyto6wmYqIY6I
+qR9sU+o6DhRaiP8tCICuhHxQpXylUM6WeJkJwduTJ8KWIvzsj4mReIKOl/oC2jSd
+gIdKhb9Q3zj9ce4N5m6v66tyvjxGZ+xf3BvUPDD+LwZeXgf7OBsNVbXzQbzto594
+nbCzPocFi3gERE50ru4K70eQCy08TPG5NpOz+DDdO5vpAuMLYEuI7O3L+3GjW40Q
+G5bu7H5/i7o/RWR67qhG/7p9kPw3nkUtYgnvnWaPMIuTfb4c2d069kjlfgWjIbbI
+tpSKmm5DHlqTE4/ECAbIEDtSaw9dXHCdL3nh5+n428xDdGbjN4lT86tfu17EYKzl
+ydH1RJ1LX3o3TEj9UkmDPt7LnftvwybMFEcP7hM2xD4lC++wKQs7Alg6dTkBnJV4
+5xU78WRntJkJTU7kFkpPKA0QfyCuSF1fAMoukDBkqUdOj6jE0BlJQlHk5iwgnJlt
+uEdkTjHZEjIUxWC6llPcAzaPNlmnD45AgfEW+Jn21IvutmJiQAz5lm9Z9PXaR0C8
+hXB6owRY67C0YKQwXhoNf6xQun2xGBGYy5rPEEezX1S1tUH5GR/KW1Lh+FzFqHXI
+ZEb5avfDqHKehGAjPON+Br7akuQ125M9LLjKuSyPaQzeeCAy356Xd7XzVwbPddbm
+9S9WSPqzaPgh10chIHoNoC8HMd33dB5j9/Q6jrbU/oPlptu/GlorWblvJdcTuBGI
+IVn45RFnkG8hCz0GJSNzW7+70YdESQbfJW79vssWMaiSjFE0pMyFXrFR5lBywBTx
+PiGEUWtvrKG94X1TMlGUzDzDJOQNZ9dT94bonNe9pVmP5BP4/DzwwiWh6qrzWk6p
+j8OE4cfCSh2WvHnhJbH7/N0v+JKjtxeIeJ16jx/K2oK5
+-----END ENCRYPTED PRIVATE KEY-----
-----BEGIN CERTIFICATE-----
MIIEWTCCAsGgAwIBAgIJAJinz4jHSjLtMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
BAYTAlhZMRcwFQYDVQQHDA5DYXN0bGUgQW50aHJheDEjMCEGA1UECgwaUHl0aG9u
@@ -66,3 +66,4 @@ jMqTFlmO7kpf/jpCSmamp3/JSEE1BJKHwQ6Ql4nzRA2N1mnvWH7Zxcv043gkHeAu
9Wc2uXpw9xF8itV4Uvcdr3dwqByvIqn7iI/gB+4l41e0u8OmH2MKOx4Nxlly5TNW
HcVKQHyOeyvnINuBAQ==
-----END CERTIFICATE-----
+
diff --git a/Lib/test/make_ssl_certs.py b/Lib/test/make_ssl_certs.py
index 3622765..41b5f46 100644
--- a/Lib/test/make_ssl_certs.py
+++ b/Lib/test/make_ssl_certs.py
@@ -206,8 +206,8 @@ if __name__ == '__main__':
with open('ssl_key.pem', 'w') as f:
f.write(key)
print("password protecting ssl_key.pem in ssl_key.passwd.pem")
- check_call(['openssl','rsa','-in','ssl_key.pem','-out','ssl_key.passwd.pem','-des3','-passout','pass:somepass'])
- check_call(['openssl','rsa','-in','ssl_key.pem','-out','keycert.passwd.pem','-des3','-passout','pass:somepass'])
+ check_call(['openssl','pkey','-in','ssl_key.pem','-out','ssl_key.passwd.pem','-aes256','-passout','pass:somepass'])
+ check_call(['openssl','pkey','-in','ssl_key.pem','-out','keycert.passwd.pem','-aes256','-passout','pass:somepass'])
with open('keycert.pem', 'w') as f:
f.write(key)
diff --git a/Lib/test/ssl_key.passwd.pem b/Lib/test/ssl_key.passwd.pem
index e4f1370..46de61a 100644
--- a/Lib/test/ssl_key.passwd.pem
+++ b/Lib/test/ssl_key.passwd.pem
@@ -1,42 +1,42 @@
------BEGIN RSA PRIVATE KEY-----
-Proc-Type: 4,ENCRYPTED
-DEK-Info: DES-EDE3-CBC,8064BE1494B24B13
-
-KJrffOMbo8M0I3PzcYxRZGMpKD1yB3Ii4+bT5XoanxjIJ+4fdx6LfZ0Rsx+riyzs
-tymsQu/iYY9j+4rCvN9+eetsL1X6iZpiimKsLexcid9M3fb0vxED5Sgw0dvunCUA
-xhqjLIKR92MKbODHf6KrDKCpsiPbjq4gZ7P+uCGXAMHL3MXIJSC0hW9rK7Ce6oyO
-CjpIcgB8x+GUWZZZhAFdlzIHMZrteNP2P5HK6QcaT71P034Dz1hhqoj4Q0t+Fta2
-4tfsM/bnTR/l6hwlhPa1e3Uj322tDTDWBScgWANn5+sEWldLmozMaWhZsn22pfk2
-KjRMGXG024JVheV882nbdOBvG7oq+lxkZ/ZP+vvqJqnvYtf7WtM8UivzYpe5Hz5b
-kVvWzPjBLUSZ9whM9rDLqSSqMPyPvDTuEmLkuq+xm7pYJmsLqIMP2klZLqRxLX6K
-uqwplb8UG440qauxgnQ905PId1l2fJEnRtV+7vXprA0L0QotgXLVHBhLmTFM+3PH
-9H3onf31dionUAPrn3nfVE36HhvVgRyvDBnBzJSIMighgq21Qx/d1dk0DRYi1hUI
-nCHl0YJPXheVcXR7JiSF2XQCAaFuS1Mr7NCXfWZOZQC/0dkvmHnl9DUAhuqq9BNZ
-1cKhZXcKHadg2/r0Zup/oDzmHPUEfTAXT0xbqoWlhkdwbF2veWQ96A/ncx3ISTb4
-PkXBlX9rdia8nmtyQDQRn4NuvchbaGkj4WKFC8pF8Hn7naHqwjpHaDUimBc0CoQW
-edNJqruKWwtSVLuwKHCC2gZFX9AXSKJXJz/QRSUlhFGOhuF/J6yKaXj6n5lxWNiQ
-54J+OP/hz2aS95CD2+Zf1SKpxdWiLZSIQqESpmmUrXROixNJZ/Z7gI74Dd9dSJOH
-W+3AU03vrrFZVrJVZhjcINHoH1Skh6JKscH18L6x4U868nSr4SrRLX8BhHllOQyD
-bmU+PZAjF8ZBIaCtTGulDXD29F73MeAZeTSsgQjFu0iKLj1wPiphbx8i/SUtR4YP
-X6PVA04g66r1NBw+3RQASVorZ3g1MSFvITHXcbKkBDeJH2z1+c6t/VVyTONnQhM5
-lLgRSk6HCbetvT9PKxWrWutA12pdBYEHdZhMHVf2+xclky7l09w8hg2/qqcdGRGe
-oAOZ72t0l5ObNyaruDKUS6f4AjOyWq/Xj5xuFtf1n3tQHyslSyCTPcAbQhDfTHUx
-vixb/V9qvYPt7OCn8py7v1M69NH42QVFAvwveDIFjZdqfIKBoJK2V4qPoevJI6uj
-Q5ByMt8OXOjSXNpHXpYQWUiWeCwOEBXJX8rzCHdMtg37jJ0zCmeErR1NTdg+EujM
-TWYgd06jlT67tURST0aB2kg4ijKgUJefD313LW1zC6gVsTbjSZxYyRbPfSP6flQB
-yCi1C19E2OsgleqbkBVC5GlYUzaJT7SGjCRmGx1eqtbrALu+LVH24Wceexlpjydl
-+s2nf/DZlKun/tlPh6YioifPCJjByZMQOCEfIox6BkemZETz8uYA4TTWimG13Z03
-gyDGC2jdpEW414J2qcQDvrdUgJ+HlhrAAHaWpMQDbXYxBGoZ+3+ORvQV4kAsCwL8
-k3EIrVpePdik+1xgOWsyLj6QxFXlTMvL6Wc5pnArFPORsgHEolJvxSPTf9aAHNPn
-V2WBvxiLBtYpGrujAUM40Syx/aN2RPtcXYPAusHUBw+S8/p+/8Kg8GZmnIXG3F89
-45Eepl2quZYIrou7a1fwIpIIZ0hFiBQ1mlHVMFtxwVHS1bQb3SU2GeO+JcGjdVXc
-04qeGuQ5M164eQ5C0T7ZQ1ULiUlFWKD30m+cjqmZzt3d7Q0mKpMKuESIuZJo/wpD
-Nas432aLKUhcNx/pOYLkKJRpGZKOupQoD5iUj/j44o8JoFkDK33v2S57XB5QGz28
-9Zuhx49b3W8mbM6EBanlQKLWJGCxXqc/jhYhFWn+b0MhidynFgA0oeWvf6ZDyt6H
-Yi5Etxsar09xp0Do3NxtQXLuSUu0ji2pQzSIKuoqQWKqldm6VrpwojiqJhy4WQBQ
-aVVyFeWBC7G3Zj76dO+yp2sfJ0itJUQ8AIB9Cg0f34rEZu+r9luPmqBoUeL95Tk7
-YvCOU3Jl8Iqysv8aNpVXT8sa8rrSbruWCByEePZ37RIdHLMVBwVY0eVaFQjrjU7E
-mXmM9eaoYLfXOllsQ+M2+qPFUITr/GU3Qig13DhK/+yC1R6V2a0l0WRhMltIPYKW
-Ztvvr4hK5LcYCeS113BLiMbDIMMZZYGDZGMdC8DnnVbT2loF0Rfmp80Af31KmMQ4
-6XvMatW9UDjBoY5a/YMpdm7SRwm+MgV2KNPpc2kST87/yi9oprGAb8qiarHiHTM0
------END RSA PRIVATE KEY-----
+-----BEGIN ENCRYPTED PRIVATE KEY-----
+MIIHbTBXBgkqhkiG9w0BBQ0wSjApBgkqhkiG9w0BBQwwHAQI072N7W+PDDMCAggA
+MAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBA/AuaRNi4vE4KGqI4In+70BIIH
+ENGS5Vex5NID873frmd1UZEHZ+O/Bd0wDb+NUpIqesHkRYf7kKi6Gnr+nKQ/oVVn
+Lm3JjE7c8ECP0OkOOXmiXuWL1SkzBBWqCI4stSGUPvBiHsGwNnvJAaGjUffgMlcC
+aJOA2+dnejLkzblq4CB2LQdm06N3Xoe9tyqtQaUHxfzJAf5Ydd8uj7vpKN2MMhY7
+icIPJwSyh0N7S6XWVtHEokr9Kp4y2hS5a+BgCWV1/1z0aF7agnSVndmT1VR+nWmc
+lM14k+lethmHMB+fsNSjnqeJ7XOPlOTHqhiZ9bBSTgF/xr5Bck/NiKRzHjdovBox
+TKg+xchaBhpRh7wBPBIlNJeHmIjv+8obOKjKU98Ig/7R9+IryZaNcKAH0PuOT+Sw
+QHXiCGQbOiYHB9UyhDTWiB7YVjd8KHefOFxfHzOQb/iBhbv1x3bTl3DgepvRN6VO
+dIsPLoIZe42sdf9GeMsk8mGJyZUQ6AzsfhWk3grb/XscizPSvrNsJ2VL1R7YTyT3
+3WA4ZXR1EqvXnWL7N/raemQjy62iOG6t7fcF5IdP9CMbWP+Plpsz4cQW7FtesCTq
+a5ZXraochQz361ODFNIeBEGU+0qqXUtZDlmos/EySkZykSeU/L0bImS62VGE3afo
+YXBmznTTT9kkFkqv7H0MerfJsrE/wF8puP3GM01DW2JRgXRpSWlvbPV/2LnMtRuD
+II7iH4rWDtTjCN6BWKAgDOnPkc9sZ4XulqT32lcUeV6LTdMBfq8kMEc8eDij1vUT
+maVCRpuwaq8EIT3lVgNLufHiG96ojlyYtj3orzw22IjkgC/9ee8UDik9CqbMVmFf
+fVHhsw8LNSg8Q4bmwm5Eg2w2it2gtI68+mwr75oCxuJ/8OMjW21Prj8XDh5reie2
+c0lDKQOFZ9UnLU1bXR/6qUM+JFKR4DMq+fOCuoQSVoyVUEOsJpvBOYnYZN9cxsZm
+vh9dKafMEcKZ8flsbr+gOmOw7+Py2ifSlf25E/Frb1W4gtbTb0LQVHb6+drutrZj
+8HEu4CnHYFCD4ZnOJb26XlZCb8GFBddW86yJYyUqMMV6Q1aJfAOAglsTo1LjIMOZ
+byo0BTAmwUevU/iuOXQ4qRBXXcoidDcTCrxfUSPG9wdt9l+m5SdQpWqfQ+fx5O7m
+SLlrHyZCiPSFMtC9DxqjIklHjf5W3wslGLgaD30YXa4VDYkRihf3CNsxGQ+tVvef
+l0ZjoAitF7Gaua06IESmKnpHe23dkr1cjYq+u2IV+xGH8LeExdwsQ9kpuTeXPnQs
+JOA99SsFx1ct32RrwjxnDDsiNkaViTKo9GDkV3jQTfoFgAVqfSgg9wGXpqUqhNG7
+TiSIHCowllLny2zn4XrXCy2niD3VDt0skb3l/PaegHE2z7S5YY85nQtYwpLiwB9M
+SQ08DYKxPBZYKtS2iZ/fsA1gjSRQDPg/SIxMhUC3M3qH8iWny1Lzl25F2Uq7VVEX
+LdTUtaby49jRTT3CQGr5n6z7bMbUegiY7h8WmOekuThGDH+4xZp6+rDP4GFk4FeK
+JcF70vMQYIjQZhadic6olv+9VtUP42ltGG/yP9a3eWRkzfAf2eCh6B1rYdgEWwE8
+rlcZzwM+y6eUmeNF2FVWB8iWtTMQHy+dYNPM+Jtus1KQKxiiq/yCRs7nWvzWRFWA
+HRyqV0J6/lqgm4FvfktFt1T0W+mDoLJOR2/zIwMy2lgL5zeHuR3SaMJnCikJbqKS
+HB3UvrhAWUcZqdH29+FhVWeM7ybyF1Wccmf+IIC/ePLa6gjtqPV8lG/5kbpcpnB6
+UQY8WWaKMxyr3jJ9bAX5QKshchp04cDecOLZrpFGNNQngR8RxSEkiIgAqNxWunIu
+KrdBDrupv/XAgEOclmgToY3iywLJSV5gHAyHWDUhRH4cFCLiGPl4XIcnXOuTze3H
+3j+EYSiS3v3DhHjp33YU2pXlJDjiYsKzAXejEh66++Y8qaQdCAad3ruWRCzW3kgk
+Md0A1VGzntTnQsewvExQEMZH2LtYIsPv3KCYGeSAuLabX4tbGk79PswjnjLLEOr0
+Ghf6RF6qf5/iFyJoG4vrbKT8kx6ywh0InILCdjUunuDskIBxX6tEcr9XwajoIvb2
+kcmGdjam5kKLS7QOWQTl8/r/cuFes0dj34cX5Qpq+Gd7tRq/D+b0207926Cxvftv
+qQ1cVn8HiLxKkZzd3tpf2xnoV1zkTL0oHrNg+qzxoxXUTUcwtIf1d/HRbYEAhi/d
+bBBoFeftEHWNq+sJgS9bH+XNzo/yK4u04B5miOq8v4CSkJdzu+ZdF22d4cjiGmtQ
+8BTmcn0Unzm+u5H0+QSZe54QBHJGNXXOIKMTkgnOdW27g4DbI1y7fCqJiSMbRW6L
+oHmMfbdB3GWqGbsUkhY8i6h9op0MU6WOX7ea2Rxyt4t6
+-----END ENCRYPTED PRIVATE KEY-----
diff --git a/Misc/NEWS.d/next/Tests/2019-09-25-13-11-29.bpo-38271.iHXNIg.rst b/Misc/NEWS.d/next/Tests/2019-09-25-13-11-29.bpo-38271.iHXNIg.rst
new file mode 100644
index 0000000..8f43d32
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-09-25-13-11-29.bpo-38271.iHXNIg.rst
@@ -0,0 +1,4 @@
+The private keys for test_ssl were encrypted with 3DES in traditional
+PKCS#5 format. 3DES and the digest algorithm of PKCS#5 are blocked by
+some strict crypto policies. Use PKCS#8 format with AES256 encryption
+instead.
--
2.21.0
From d8584f9bb3fb841a1b21ed25abc2237ea8bbc206 Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Tue, 26 Nov 2019 23:57:21 +0100
Subject: [PATCH 3/5] Use PROTOCOL_TLS_CLIENT/SERVER
Replaces PROTOCOL_TLSv* and PROTOCOL_SSLv23 with PROTOCOL_TLS_CLIENT and
PROTOCOL_TLS_SERVER.
Partially backports a170fa162dc03f0a014373349e548954fff2e567
---
Lib/ssl.py | 7 +-
Lib/test/test_logging.py | 2 +-
Lib/test/test_ssl.py | 169 +++++++++++++++++++--------------------
3 files changed, 87 insertions(+), 91 deletions(-)
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 0114387..c5c5529 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -473,7 +473,7 @@ def create_default_context(purpose=Purpose.SERVER_AUTH, *, cafile=None,
context.load_default_certs(purpose)
return context
-def _create_unverified_context(protocol=PROTOCOL_TLS, *, cert_reqs=None,
+def _create_unverified_context(protocol=PROTOCOL_TLS, *, cert_reqs=CERT_NONE,
check_hostname=False, purpose=Purpose.SERVER_AUTH,
certfile=None, keyfile=None,
cafile=None, capath=None, cadata=None):
@@ -492,9 +492,12 @@ def _create_unverified_context(protocol=PROTOCOL_TLS, *, cert_reqs=None,
# by default.
context = SSLContext(protocol)
+ if not check_hostname:
+ context.check_hostname = False
if cert_reqs is not None:
context.verify_mode = cert_reqs
- context.check_hostname = check_hostname
+ if check_hostname:
+ context.check_hostname = True
if keyfile and not certfile:
raise ValueError("certfile must be specified")
diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py
index 763a5d1..d5c63b4 100644
--- a/Lib/test/test_logging.py
+++ b/Lib/test/test_logging.py
@@ -1830,7 +1830,7 @@ class HTTPHandlerTest(BaseTest):
else:
here = os.path.dirname(__file__)
localhost_cert = os.path.join(here, "keycert.pem")
- sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.load_cert_chain(localhost_cert)
context = ssl.create_default_context(cafile=localhost_cert)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 639109f..a7bf2f7 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -155,6 +155,8 @@ def test_wrap_socket(sock, ssl_version=ssl.PROTOCOL_TLS, *,
**kwargs):
context = ssl.SSLContext(ssl_version)
if cert_reqs is not None:
+ if cert_reqs == ssl.CERT_NONE:
+ context.check_hostname = False
context.verify_mode = cert_reqs
if ca_certs is not None:
context.load_verify_locations(ca_certs)
@@ -1377,7 +1379,7 @@ class ContextTests(unittest.TestCase):
self._assert_context_options(ctx)
def test_check_hostname(self):
- ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS)
self.assertFalse(ctx.check_hostname)
# Requires CERT_REQUIRED or CERT_OPTIONAL
@@ -2386,17 +2388,13 @@ if _have_threads:
server_params_test(context, context,
chatty=True, connectionchatty=True)
- client_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
- client_context.load_verify_locations(SIGNING_CA)
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
- # server_context.load_verify_locations(SIGNING_CA)
- server_context.load_cert_chain(SIGNED_CERTFILE2)
+ client_context, server_context, hostname = testing_context()
with self.subTest(client=ssl.PROTOCOL_TLS_CLIENT, server=ssl.PROTOCOL_TLS_SERVER):
server_params_test(client_context=client_context,
server_context=server_context,
chatty=True, connectionchatty=True,
- sni_name='fakehostname')
+ sni_name='localhost')
client_context.check_hostname = False
with self.subTest(client=ssl.PROTOCOL_TLS_SERVER, server=ssl.PROTOCOL_TLS_CLIENT):
@@ -2404,7 +2402,7 @@ if _have_threads:
server_params_test(client_context=server_context,
server_context=client_context,
chatty=True, connectionchatty=True,
- sni_name='fakehostname')
+ sni_name='localhost')
self.assertIn('called a function you should not call',
str(e.exception))
@@ -2469,39 +2467,38 @@ if _have_threads:
if support.verbose:
sys.stdout.write("\n")
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- server_context.load_cert_chain(SIGNED_CERTFILE)
+ client_context, server_context, hostname = testing_context()
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.verify_mode = ssl.CERT_REQUIRED
- context.load_verify_locations(SIGNING_CA)
tf = getattr(ssl, "VERIFY_X509_TRUSTED_FIRST", 0)
- self.assertEqual(context.verify_flags, ssl.VERIFY_DEFAULT | tf)
+ self.assertEqual(client_context.verify_flags, ssl.VERIFY_DEFAULT | tf)
# VERIFY_DEFAULT should pass
server = ThreadedEchoServer(context=server_context, chatty=True)
with server:
- with context.wrap_socket(socket.socket()) as s:
+ with client_context.wrap_socket(socket.socket(),
+ server_hostname=hostname) as s:
s.connect((HOST, server.port))
cert = s.getpeercert()
self.assertTrue(cert, "Can't get peer certificate.")
# VERIFY_CRL_CHECK_LEAF without a loaded CRL file fails
- context.verify_flags |= ssl.VERIFY_CRL_CHECK_LEAF
+ client_context.verify_flags |= ssl.VERIFY_CRL_CHECK_LEAF
server = ThreadedEchoServer(context=server_context, chatty=True)
with server:
- with context.wrap_socket(socket.socket()) as s:
+ with client_context.wrap_socket(socket.socket(),
+ server_hostname=hostname) as s:
with self.assertRaisesRegex(ssl.SSLError,
"certificate verify failed"):
s.connect((HOST, server.port))
# now load a CRL file. The CRL file is signed by the CA.
- context.load_verify_locations(CRLFILE)
+ client_context.load_verify_locations(CRLFILE)
server = ThreadedEchoServer(context=server_context, chatty=True)
with server:
- with context.wrap_socket(socket.socket()) as s:
+ with client_context.wrap_socket(socket.socket(),
+ server_hostname=hostname) as s:
s.connect((HOST, server.port))
cert = s.getpeercert()
self.assertTrue(cert, "Can't get peer certificate.")
@@ -2510,19 +2507,13 @@ if _have_threads:
if support.verbose:
sys.stdout.write("\n")
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- server_context.load_cert_chain(SIGNED_CERTFILE)
-
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.verify_mode = ssl.CERT_REQUIRED
- context.check_hostname = True
- context.load_verify_locations(SIGNING_CA)
+ client_context, server_context, hostname = testing_context()
# correct hostname should verify
server = ThreadedEchoServer(context=server_context, chatty=True)
with server:
- with context.wrap_socket(socket.socket(),
- server_hostname="localhost") as s:
+ with client_context.wrap_socket(socket.socket(),
+ server_hostname=hostname) as s:
s.connect((HOST, server.port))
cert = s.getpeercert()
self.assertTrue(cert, "Can't get peer certificate.")
@@ -2530,7 +2521,7 @@ if _have_threads:
# incorrect hostname should raise an exception
server = ThreadedEchoServer(context=server_context, chatty=True)
with server:
- with context.wrap_socket(socket.socket(),
+ with client_context.wrap_socket(socket.socket(),
server_hostname="invalid") as s:
with self.assertRaisesRegex(ssl.CertificateError,
"hostname 'invalid' doesn't match 'localhost'"):
@@ -2542,7 +2533,7 @@ if _have_threads:
with socket.socket() as s:
with self.assertRaisesRegex(ValueError,
"check_hostname requires server_hostname"):
- context.wrap_socket(s)
+ client_context.wrap_socket(s)
def test_wrong_cert(self):
"""Connecting when the server rejects the client's certificate
@@ -2767,7 +2758,6 @@ if _have_threads:
msgs = (b"msg 1", b"MSG 2", b"STARTTLS", b"MSG 3", b"msg 4", b"ENDTLS", b"msg 5", b"msg 6")
server = ThreadedEchoServer(CERTFILE,
- ssl_version=ssl.PROTOCOL_TLSv1,
starttls_server=True,
chatty=True,
connectionchatty=True)
@@ -2795,7 +2785,7 @@ if _have_threads:
sys.stdout.write(
" client: read %r from server, starting TLS...\n"
% msg)
- conn = test_wrap_socket(s, ssl_version=ssl.PROTOCOL_TLSv1)
+ conn = test_wrap_socket(s)
wrapped = True
elif indata == b"ENDTLS" and msg.startswith(b"ok"):
# ENDTLS ok, switch back to clear text
@@ -2882,7 +2872,7 @@ if _have_threads:
server = ThreadedEchoServer(CERTFILE,
certreqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1,
+ ssl_version=ssl.PROTOCOL_TLS_SERVER,
cacerts=CERTFILE,
chatty=True,
connectionchatty=False)
@@ -2892,7 +2882,7 @@ if _have_threads:
certfile=CERTFILE,
ca_certs=CERTFILE,
cert_reqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1)
+ ssl_version=ssl.PROTOCOL_TLS_CLIENT)
s.connect((HOST, server.port))
# helper methods for standardising recv* method signatures
def _recv_into():
@@ -3034,7 +3024,7 @@ if _have_threads:
def test_nonblocking_send(self):
server = ThreadedEchoServer(CERTFILE,
certreqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1,
+ ssl_version=ssl.PROTOCOL_TLS_SERVER,
cacerts=CERTFILE,
chatty=True,
connectionchatty=False)
@@ -3044,7 +3034,7 @@ if _have_threads:
certfile=CERTFILE,
ca_certs=CERTFILE,
cert_reqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1)
+ ssl_version=ssl.PROTOCOL_TLS_CLIENT)
s.connect((HOST, server.port))
s.setblocking(False)
@@ -3190,9 +3180,11 @@ if _have_threads:
Basic tests for SSLSocket.version().
More tests are done in the test_protocol_*() methods.
"""
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
with ThreadedEchoServer(CERTFILE,
- ssl_version=ssl.PROTOCOL_TLSv1,
+ ssl_version=ssl.PROTOCOL_TLS_SERVER,
chatty=False) as server:
with context.wrap_socket(socket.socket()) as s:
self.assertIs(s.version(), None)
@@ -3247,7 +3239,7 @@ if _have_threads:
server = ThreadedEchoServer(CERTFILE,
certreqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1,
+ ssl_version=ssl.PROTOCOL_TLS_SERVER,
cacerts=CERTFILE,
chatty=True,
connectionchatty=False)
@@ -3257,7 +3249,7 @@ if _have_threads:
certfile=CERTFILE,
ca_certs=CERTFILE,
cert_reqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1)
+ ssl_version=ssl.PROTOCOL_TLS_CLIENT)
s.connect((HOST, server.port))
# get the data
cb_data = s.get_channel_binding("tls-unique")
@@ -3282,7 +3274,7 @@ if _have_threads:
certfile=CERTFILE,
ca_certs=CERTFILE,
cert_reqs=ssl.CERT_NONE,
- ssl_version=ssl.PROTOCOL_TLSv1)
+ ssl_version=ssl.PROTOCOL_TLS_CLIENT)
s.connect((HOST, server.port))
new_cb_data = s.get_channel_binding("tls-unique")
if support.verbose:
@@ -3299,32 +3291,35 @@ if _have_threads:
s.close()
def test_compression(self):
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.load_cert_chain(CERTFILE)
- stats = server_params_test(context, context,
- chatty=True, connectionchatty=True)
+ client_context, server_context, hostname = testing_context()
+ stats = server_params_test(client_context, server_context,
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
if support.verbose:
sys.stdout.write(" got compression: {!r}\n".format(stats['compression']))
self.assertIn(stats['compression'], { None, 'ZLIB', 'RLE' })
+
@unittest.skipUnless(hasattr(ssl, 'OP_NO_COMPRESSION'),
"ssl.OP_NO_COMPRESSION needed for this test")
def test_compression_disabled(self):
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.load_cert_chain(CERTFILE)
- context.options |= ssl.OP_NO_COMPRESSION
- stats = server_params_test(context, context,
- chatty=True, connectionchatty=True)
+ client_context, server_context, hostname = testing_context()
+ client_context.options |= ssl.OP_NO_COMPRESSION
+ server_context.options |= ssl.OP_NO_COMPRESSION
+ stats = server_params_test(client_context, server_context,
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
self.assertIs(stats['compression'], None)
def test_dh_params(self):
# Check we can get a connection with ephemeral Diffie-Hellman
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.load_cert_chain(CERTFILE)
- context.load_dh_params(DHFILE)
- context.set_ciphers("kEDH")
- stats = server_params_test(context, context,
- chatty=True, connectionchatty=True)
+ client_context, server_context, hostname = testing_context()
+ server_context.load_dh_params(DHFILE)
+ server_context.set_ciphers("kEDH")
+ server_context.options |= ssl.OP_NO_TLSv1_3
+ stats = server_params_test(client_context, server_context,
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
cipher = stats["cipher"][0]
parts = cipher.split("-")
if "ADH" not in parts and "EDH" not in parts and "DHE" not in parts:
@@ -3332,22 +3327,20 @@ if _have_threads:
def test_selected_alpn_protocol(self):
# selected_alpn_protocol() is None unless ALPN is used.
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.load_cert_chain(CERTFILE)
- stats = server_params_test(context, context,
- chatty=True, connectionchatty=True)
+ client_context, server_context, hostname = testing_context()
+ stats = server_params_test(client_context, server_context,
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
self.assertIs(stats['client_alpn_protocol'], None)
@unittest.skipUnless(ssl.HAS_ALPN, "ALPN support required")
def test_selected_alpn_protocol_if_server_uses_alpn(self):
# selected_alpn_protocol() is None unless ALPN is used by the client.
- client_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- client_context.load_verify_locations(CERTFILE)
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- server_context.load_cert_chain(CERTFILE)
+ client_context, server_context, hostname = testing_context()
server_context.set_alpn_protocols(['foo', 'bar'])
stats = server_params_test(client_context, server_context,
- chatty=True, connectionchatty=True)
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
self.assertIs(stats['client_alpn_protocol'], None)
@unittest.skipUnless(ssl.HAS_ALPN, "ALPN support needed for this test")
@@ -3394,10 +3387,10 @@ if _have_threads:
def test_selected_npn_protocol(self):
# selected_npn_protocol() is None unless NPN is used
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.load_cert_chain(CERTFILE)
- stats = server_params_test(context, context,
- chatty=True, connectionchatty=True)
+ client_context, server_context, hostname = testing_context()
+ stats = server_params_test(client_context, server_context,
+ chatty=True, connectionchatty=True,
+ sni_name=hostname)
self.assertIs(stats['client_npn_protocol'], None)
@unittest.skipUnless(ssl.HAS_NPN, "NPN support needed for this test")
@@ -3430,12 +3423,11 @@ if _have_threads:
self.assertEqual(server_result, expected, msg % (server_result, "server"))
def sni_contexts(self):
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ server_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
server_context.load_cert_chain(SIGNED_CERTFILE)
- other_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ other_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
other_context.load_cert_chain(SIGNED_CERTFILE2)
- client_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- client_context.verify_mode = ssl.CERT_REQUIRED
+ client_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
client_context.load_verify_locations(SIGNING_CA)
return server_context, other_context, client_context
@@ -3448,6 +3440,8 @@ if _have_threads:
calls = []
server_context, other_context, client_context = self.sni_contexts()
+ client_context.check_hostname = False
+
def servername_cb(ssl_sock, server_name, initial_context):
calls.append((server_name, initial_context))
if server_name is not None:
@@ -3533,11 +3527,7 @@ if _have_threads:
self.assertIn("TypeError", stderr.getvalue())
def test_shared_ciphers(self):
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- server_context.load_cert_chain(SIGNED_CERTFILE)
- client_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- client_context.verify_mode = ssl.CERT_REQUIRED
- client_context.load_verify_locations(SIGNING_CA)
+ client_context, server_context, hostname = testing_context()
if ssl.OPENSSL_VERSION_INFO >= (1, 0, 2):
client_context.set_ciphers("AES128:AES256")
server_context.set_ciphers("AES256")
@@ -3555,7 +3545,8 @@ if _have_threads:
# TLS 1.3 ciphers are always enabled
expected_algs.extend(["TLS_CHACHA20", "TLS_AES"])
- stats = server_params_test(client_context, server_context)
+ stats = server_params_test(client_context, server_context,
+ sni_name=hostname)
ciphers = stats['server_shared_ciphers'][0]
self.assertGreater(len(ciphers), 0)
for name, tls_version, bits in ciphers:
@@ -3595,14 +3586,13 @@ if _have_threads:
self.assertEqual(s.recv(1024), TEST_DATA)
def test_session(self):
- server_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- server_context.load_cert_chain(SIGNED_CERTFILE)
- client_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- client_context.verify_mode = ssl.CERT_REQUIRED
- client_context.load_verify_locations(SIGNING_CA)
+ client_context, server_context, hostname = testing_context()
+ # TODO: sessions aren't compatible with TLSv1.3 yet
+ client_context.options |= ssl.OP_NO_TLSv1_3
# first connection without session
- stats = server_params_test(client_context, server_context)
+ stats = server_params_test(client_context, server_context,
+ sni_name=hostname)
session = stats['session']
self.assertTrue(session.id)
self.assertGreater(session.time, 0)
@@ -3616,7 +3606,8 @@ if _have_threads:
self.assertEqual(sess_stat['hits'], 0)
# reuse session
- stats = server_params_test(client_context, server_context, session=session)
+ stats = server_params_test(client_context, server_context,
+ session=session, sni_name=hostname)
sess_stat = server_context.session_stats()
self.assertEqual(sess_stat['accept'], 2)
self.assertEqual(sess_stat['hits'], 1)
@@ -3629,7 +3620,8 @@ if _have_threads:
self.assertGreaterEqual(session2.timeout, session.timeout)
# another one without session
- stats = server_params_test(client_context, server_context)
+ stats = server_params_test(client_context, server_context,
+ sni_name=hostname)
self.assertFalse(stats['session_reused'])
session3 = stats['session']
self.assertNotEqual(session3.id, session.id)
@@ -3639,7 +3631,8 @@ if _have_threads:
self.assertEqual(sess_stat['hits'], 1)
# reuse session again
- stats = server_params_test(client_context, server_context, session=session)
+ stats = server_params_test(client_context, server_context,
+ session=session, sni_name=hostname)
self.assertTrue(stats['session_reused'])
session4 = stats['session']
self.assertEqual(session4.id, session.id)
--
2.21.0
From 743c3e09b485092b51a982ab9859ffc79cbb7791 Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Wed, 27 Nov 2019 00:01:17 +0100
Subject: [PATCH 4/5] Adjust some tests for TLS 1.3 compatibility
Partially backports some changes from 529525fb5a8fd9b96ab4021311a598c77588b918
and 2614ed4c6e4b32eafb683f2378ed20e87d42976d
---
Lib/test/test_ssl.py | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index a7bf2f7..43c2dbc 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -3189,7 +3189,12 @@ if _have_threads:
with context.wrap_socket(socket.socket()) as s:
self.assertIs(s.version(), None)
s.connect((HOST, server.port))
- self.assertEqual(s.version(), 'TLSv1')
+ if IS_OPENSSL_1_1:
+ self.assertEqual(s.version(), 'TLSv1.3')
+ elif ssl.OPENSSL_VERSION_INFO >= (1, 0, 2):
+ self.assertEqual(s.version(), 'TLSv1.2')
+ else: # 0.9.8 to 1.0.1
+ self.assertIn(s.version(), ('TLSv1', 'TLSv1.2'))
self.assertIs(s.version(), None)
@unittest.skipUnless(ssl.HAS_TLSv1_3,
@@ -3259,7 +3264,10 @@ if _have_threads:
# check if it is sane
self.assertIsNotNone(cb_data)
- self.assertEqual(len(cb_data), 12) # True for TLSv1
+ if s.version() == 'TLSv1.3':
+ self.assertEqual(len(cb_data), 48)
+ else:
+ self.assertEqual(len(cb_data), 12) # True for TLSv1
# and compare with the peers version
s.write(b"CB tls-unique\n")
@@ -3283,7 +3291,10 @@ if _have_threads:
# is it really unique
self.assertNotEqual(cb_data, new_cb_data)
self.assertIsNotNone(cb_data)
- self.assertEqual(len(cb_data), 12) # True for TLSv1
+ if s.version() == 'TLSv1.3':
+ self.assertEqual(len(cb_data), 48)
+ else:
+ self.assertEqual(len(cb_data), 12) # True for TLSv1
s.write(b"CB tls-unique\n")
peer_data_repr = s.read().strip()
self.assertEqual(peer_data_repr,
--
2.21.0
From cd250c8a782f36c7a6f5ffabc922cb75744fa9c0 Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Tue, 26 Nov 2019 23:18:10 +0100
Subject: [PATCH 5/5] Skip the ssl tests that rely on TLSv1 and TLSv1.1
availability
---
Lib/test/test_ssl.py | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 43c2dbc..b35db25 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -39,6 +39,13 @@ IS_LIBRESSL = ssl.OPENSSL_VERSION.startswith('LibreSSL')
IS_OPENSSL_1_1 = not IS_LIBRESSL and ssl.OPENSSL_VERSION_INFO >= (1, 1, 0)
PY_SSL_DEFAULT_CIPHERS = sysconfig.get_config_var('PY_SSL_DEFAULT_CIPHERS')
+# On RHEL8 openssl disables TLSv1 and TLSv1.1 on runtime.
+# Since we don't have a good way to detect runtime changes
+# on the allowed protocols, we hardcode the default config
+# with those flags.
+TLSv1_enabled = False
+TLSv1_1_enabled = False
+
def data_file(*name):
return os.path.join(os.path.dirname(__file__), *name)
@@ -2380,7 +2387,8 @@ if _have_threads:
if support.verbose:
sys.stdout.write("\n")
for protocol in PROTOCOLS:
- if protocol in {ssl.PROTOCOL_TLS_CLIENT, ssl.PROTOCOL_TLS_SERVER}:
+ if protocol in {ssl.PROTOCOL_TLS_CLIENT, ssl.PROTOCOL_TLS_SERVER,
+ ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1_1}:
continue
with self.subTest(protocol=ssl._PROTOCOL_NAMES[protocol]):
context = ssl.SSLContext(protocol)
@@ -2650,17 +2658,20 @@ if _have_threads:
if hasattr(ssl, 'PROTOCOL_SSLv3'):
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv3, False)
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv23, True)
- try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1')
+ if TLSv1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1')
if hasattr(ssl, 'PROTOCOL_SSLv3'):
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv3, False, ssl.CERT_OPTIONAL)
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv23, True, ssl.CERT_OPTIONAL)
- try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_OPTIONAL)
+ if TLSv1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_OPTIONAL)
if hasattr(ssl, 'PROTOCOL_SSLv3'):
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv3, False, ssl.CERT_REQUIRED)
try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_SSLv23, True, ssl.CERT_REQUIRED)
- try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_REQUIRED)
+ if TLSv1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_REQUIRED)
# Server with specific SSL options
if hasattr(ssl, 'PROTOCOL_SSLv3'):
@@ -2698,9 +2709,10 @@ if _have_threads:
"""Connecting to a TLSv1 server with various client options"""
if support.verbose:
sys.stdout.write("\n")
- try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1')
- try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_OPTIONAL)
- try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_REQUIRED)
+ if TLSv1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1')
+ try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_OPTIONAL)
+ try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1, 'TLSv1', ssl.CERT_REQUIRED)
if hasattr(ssl, 'PROTOCOL_SSLv2'):
try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_SSLv2, False)
if hasattr(ssl, 'PROTOCOL_SSLv3'):
@@ -2716,7 +2728,8 @@ if _have_threads:
Testing against older TLS versions."""
if support.verbose:
sys.stdout.write("\n")
- try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_TLSv1_1, 'TLSv1.1')
+ if TLSv1_1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_TLSv1_1, 'TLSv1.1')
if hasattr(ssl, 'PROTOCOL_SSLv2'):
try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_SSLv2, False)
if hasattr(ssl, 'PROTOCOL_SSLv3'):
@@ -2724,7 +2737,8 @@ if _have_threads:
try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_SSLv23, False,
client_options=ssl.OP_NO_TLSv1_1)
- try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1_1, 'TLSv1.1')
+ if TLSv1_1_enabled:
+ try_protocol_combo(ssl.PROTOCOL_SSLv23, ssl.PROTOCOL_TLSv1_1, 'TLSv1.1')
try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_TLSv1, False)
try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1_1, False)
--
2.21.0

@ -0,0 +1,41 @@
commit 86ed41792d394f804d2c9e695ac8b257220fbdee
Author: Victor Stinner <vstinner@redhat.com>
Date: Tue Mar 12 17:17:13 2019 +0100
Fix test_tarfile on ppc64
Fix sparse file tests of test_tarfile on ppc64le with the tmpfs
filesystem.
* https://bugzilla.redhat.com/show_bug.cgi?id=1639490
* https://bugs.python.org/issue35772
* https://github.com/python/cpython/commit/d1dd6be613381b996b9071443ef081de8e5f3aff
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 4cd7d53..bd8b05f 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -973,16 +973,21 @@ class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
def _fs_supports_holes():
# Return True if the platform knows the st_blocks stat attribute and
# uses st_blocks units of 512 bytes, and if the filesystem is able to
- # store holes in files.
+ # store holes of 4 KiB in files.
+ #
+ # The function returns False if page size is larger than 4 KiB.
+ # For example, ppc64 uses pages of 64 KiB.
if sys.platform.startswith("linux"):
# Linux evidentially has 512 byte st_blocks units.
name = os.path.join(TEMPDIR, "sparse-test")
with open(name, "wb") as fobj:
+ # Seek to "punch a hole" of 4 KiB
fobj.seek(4096)
+ fobj.write(b'x' * 4096)
fobj.truncate()
s = os.stat(name)
support.unlink(name)
- return s.st_blocks == 0
+ return (s.st_blocks * 512 < s.st_size)
else:
return False

@ -0,0 +1,137 @@
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index d991254..647af61 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -121,6 +121,11 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
+ Characters in the :attr:`netloc` attribute that decompose under NFKC
+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
+ decomposed before parsing, no error will be raised.
+
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -133,6 +138,10 @@ or on combining URL components into a URL string.
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
+ .. versionchanged:: 3.6.9
+ Characters that affect netloc parsing under NFKC normalization will
+ now raise :exc:`ValueError`.
+
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
@@ -256,10 +265,19 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
+ Characters in the :attr:`netloc` attribute that decompose under NFKC
+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
+ decomposed before parsing, no error will be raised.
+
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
+ .. versionchanged:: 3.6.9
+ Characters that affect netloc parsing under NFKC normalization will
+ now raise :exc:`ValueError`.
+
.. function:: urlunsplit(parts)
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index be50b47..68f633c 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1,3 +1,5 @@
+import sys
+import unicodedata
import unittest
import urllib.parse
@@ -984,6 +986,34 @@ class UrlParseTestCase(unittest.TestCase):
expected.append(name)
self.assertCountEqual(urllib.parse.__all__, expected)
+ def test_urlsplit_normalization(self):
+ # Certain characters should never occur in the netloc,
+ # including under normalization.
+ # Ensure that ALL of them are detected and cause an error
+ illegal_chars = '/:#?@'
+ hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
+ denorm_chars = [
+ c for c in map(chr, range(128, sys.maxunicode))
+ if (hex_chars & set(unicodedata.decomposition(c).split()))
+ and c not in illegal_chars
+ ]
+ # Sanity check that we found at least one such character
+ self.assertIn('\u2100', denorm_chars)
+ self.assertIn('\uFF03', denorm_chars)
+
+ # bpo-36742: Verify port separators are ignored when they
+ # existed prior to decomposition
+ urllib.parse.urlsplit('http://\u30d5\u309a:80')
+ with self.assertRaises(ValueError):
+ urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
+
+ for scheme in ["http", "https", "ftp"]:
+ for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
+ for c in denorm_chars:
+ url = "{}://{}/path".format(scheme, netloc.format(c))
+ with self.subTest(url=url, char='{:04X}'.format(ord(c))):
+ with self.assertRaises(ValueError):
+ urllib.parse.urlsplit(url)
class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 85e68c8..fa8827a 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -391,6 +391,24 @@ def _splitnetloc(url, start=0):
delim = min(delim, wdelim) # use earliest delim position
return url[start:delim], url[delim:] # return (domain, rest)
+def _checknetloc(netloc):
+ if not netloc or not any(ord(c) > 127 for c in netloc):
+ return
+ # looking for characters like \u2100 that expand to 'a/c'
+ # IDNA uses NFKC equivalence, so normalize for this check
+ import unicodedata
+ n = netloc.replace('@', '') # ignore characters already included
+ n = n.replace(':', '') # but not the surrounding text
+ n = n.replace('#', '')
+ n = n.replace('?', '')
+ netloc2 = unicodedata.normalize('NFKC', n)
+ if n == netloc2:
+ return
+ for c in '/?#@:':
+ if c in netloc2:
+ raise ValueError("netloc '" + netloc + "' contains invalid " +
+ "characters under NFKC normalization")
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -420,6 +438,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
+ _checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)
@@ -443,6 +462,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
+ _checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)

@ -0,0 +1,150 @@
From 7e200e0763f5b71c199aaf98bd5588f291585619 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
Date: Tue, 7 May 2019 17:28:47 +0200
Subject: [PATCH] bpo-30458: Disallow control chars in http URLs. (GH-12755)
(GH-13154)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Disallow control chars in http URLs in urllib.urlopen. This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
Disable https related urllib tests on a build without ssl (GH-13032)
These tests require an SSL enabled build. Skip these tests when python is built without SSL to fix test failures.
Use http.client.InvalidURL instead of ValueError as the new error case's exception. (GH-13044)
Backport Co-Authored-By: Miro Hrončok <miro@hroncok.cz>
---
Lib/http/client.py | 15 ++++++
Lib/test/test_urllib.py | 53 +++++++++++++++++++
Lib/test/test_xmlrpc.py | 7 ++-
.../2019-04-10-08-53-30.bpo-30458.51E-DA.rst | 1 +
4 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 1de151c38e..2afd452fe3 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -140,6 +140,16 @@ _MAXHEADERS = 100
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
+# These characters are not allowed within HTTP URL paths.
+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
+# Prevents CVE-2019-9740. Includes control characters such as \r\n.
+# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
+# Arguably only these _should_ allowed:
+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
+# We are more lenient for assumed real world compatibility purposes.
+
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -1101,6 +1111,11 @@ class HTTPConnection:
self._method = method
if not url:
url = '/'
+ # Prevent CVE-2019-9740.
+ match = _contains_disallowed_url_pchar_re.search(url)
+ if match:
+ raise InvalidURL(f"URL can't contain control characters. {url!r} "
+ f"(found at least {match.group()!r})")
request = '%s %s %s' % (method, url, self._http_vsn_str)
# Non-ASCII characters should have been eliminated earlier
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 2ac73b58d8..7214492eca 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -329,6 +329,59 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
finally:
self.unfakehttp()
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_with_control_char_rejected(self):
+ for char_no in list(range(0, 0x21)) + [0x7f]:
+ char = chr(char_no)
+ schemeless_url = f"//localhost:7777/test{char}/"
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ try:
+ # We explicitly test urllib.request.urlopen() instead of the top
+ # level 'def urlopen()' function defined in this... (quite ugly)
+ # test suite. They use different url opening codepaths. Plain
+ # urlopen uses FancyURLOpener which goes via a codepath that
+ # calls urllib.parse.quote() on the URL which makes all of the
+ # above attempts at injection within the url _path_ safe.
+ escaped_char_repr = repr(char).replace('\\', r'\\')
+ InvalidURL = http.client.InvalidURL
+ with self.assertRaisesRegex(
+ InvalidURL, f"contain control.*{escaped_char_repr}"):
+ urllib.request.urlopen(f"http:{schemeless_url}")
+ with self.assertRaisesRegex(
+ InvalidURL, f"contain control.*{escaped_char_repr}"):
+ urllib.request.urlopen(f"https:{schemeless_url}")
+ # This code path quotes the URL so there is no injection.
+ resp = urlopen(f"http:{schemeless_url}")
+ self.assertNotIn(char, resp.geturl())
+ finally:
+ self.unfakehttp()
+
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_with_newline_header_injection_rejected(self):
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
+ schemeless_url = "//" + host + ":8080/test/?test=a"
+ try:
+ # We explicitly test urllib.request.urlopen() instead of the top
+ # level 'def urlopen()' function defined in this... (quite ugly)
+ # test suite. They use different url opening codepaths. Plain
+ # urlopen uses FancyURLOpener which goes via a codepath that
+ # calls urllib.parse.quote() on the URL which makes all of the
+ # above attempts at injection within the url _path_ safe.
+ InvalidURL = http.client.InvalidURL
+ with self.assertRaisesRegex(
+ InvalidURL, r"contain control.*\\r.*(found at least . .)"):
+ urllib.request.urlopen(f"http:{schemeless_url}")
+ with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
+ urllib.request.urlopen(f"https:{schemeless_url}")
+ # This code path quotes the URL so there is no injection.
+ resp = urlopen(f"http:{schemeless_url}")
+ self.assertNotIn(' ', resp.geturl())
+ self.assertNotIn('\r', resp.geturl())
+ self.assertNotIn('\n', resp.geturl())
+ finally:
+ self.unfakehttp()
+
def test_read_0_9(self):
# "0.9" response accepted (but not "simple responses" without
# a status line)
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
index 32263f7f0b..0e002ec4ef 100644
--- a/Lib/test/test_xmlrpc.py
+++ b/Lib/test/test_xmlrpc.py
@@ -945,7 +945,12 @@ class SimpleServerTestCase(BaseServerTestCase):
def test_partial_post(self):
# Check that a partial POST doesn't make the server loop: issue #14001.
conn = http.client.HTTPConnection(ADDR, PORT)
- conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
+ conn.send('POST /RPC2 HTTP/1.0\r\n'
+ 'Content-Length: 100\r\n\r\n'
+ 'bye HTTP/1.1\r\n'
+ f'Host: {ADDR}:{PORT}\r\n'
+ 'Accept-Encoding: identity\r\n'
+ 'Content-Length: 0\r\n\r\n'.encode('ascii'))
conn.close()
def test_context_manager(self):
diff --git a/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
new file mode 100644
index 0000000000..ed8027fb4d
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
@@ -0,0 +1 @@
+Address CVE-2019-9740 by disallowing URL paths with embedded whitespace or control characters through into the underlying http client request. Such potentially malicious header injection URLs now cause an http.client.InvalidURL exception to be raised.
--
2.21.0

@ -0,0 +1,49 @@
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 649a5b8..0061a52 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -16,6 +16,7 @@ except ImportError:
ssl = None
import sys
import tempfile
+import warnings
from nturl2path import url2pathname, pathname2url
from base64 import b64encode
@@ -1463,6 +1464,23 @@ class URLopener_Tests(unittest.TestCase):
"spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
"//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
+ def test_local_file_open(self):
+ # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
+ class DummyURLopener(urllib.request.URLopener):
+ def open_local_file(self, url):
+ return url
+
+ with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ for url in ('local_file://example', 'local-file://example'):
+ self.assertRaises(OSError, urllib.request.urlopen, url)
+ self.assertRaises(OSError, urllib.request.URLopener().open, url)
+ self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
+ self.assertRaises(OSError, DummyURLopener().open, url)
+ self.assertRaises(OSError, DummyURLopener().retrieve, url)
+
+
# Just commented them out.
# Can't really tell why keep failing in windows and sparc.
# Everywhere else they work ok, but on those machines, sometimes
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index d28f2f8..c9945d9 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1747,7 +1747,7 @@ class URLopener:
name = 'open_' + urltype
self.type = urltype
name = name.replace('-', '_')
- if not hasattr(self, name):
+ if not hasattr(self, name) or name == 'open_local_file':
if proxy:
return self.open_unknown_proxy(proxy, fullurl, data)
else:

@ -0,0 +1,117 @@
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 883201f..cf4d84d 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -3891,6 +3891,37 @@ class TestPostHandshakeAuth(unittest.TestCase):
s.write(b'PHA')
self.assertIn(b'WRONG_SSL_VERSION', s.recv(1024))
+ def test_bpo37428_pha_cert_none(self):
+ # verify that post_handshake_auth does not implicitly enable cert
+ # validation.
+ hostname = 'localhost'
+ client_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ client_context.post_handshake_auth = True
+ client_context.load_cert_chain(SIGNED_CERTFILE)
+ # no cert validation and CA on client side
+ client_context.check_hostname = False
+ client_context.verify_mode = ssl.CERT_NONE
+
+ server_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ server_context.load_cert_chain(SIGNED_CERTFILE)
+ server_context.load_verify_locations(SIGNING_CA)
+ server_context.post_handshake_auth = True
+ server_context.verify_mode = ssl.CERT_REQUIRED
+
+ server = ThreadedEchoServer(context=server_context, chatty=False)
+ with server:
+ with client_context.wrap_socket(socket.socket(),
+ server_hostname=hostname) as s:
+ s.connect((HOST, server.port))
+ s.write(b'HASCERT')
+ self.assertEqual(s.recv(1024), b'FALSE\n')
+ s.write(b'PHA')
+ self.assertEqual(s.recv(1024), b'OK\n')
+ s.write(b'HASCERT')
+ self.assertEqual(s.recv(1024), b'TRUE\n')
+ # server cert has not been validated
+ self.assertEqual(s.getpeercert(), {})
+
def test_main(verbose=False):
if support.verbose:
diff --git a/Modules/_ssl.c b/Modules/_ssl.c
index ec366f0..9bf1cde 100644
--- a/Modules/_ssl.c
+++ b/Modules/_ssl.c
@@ -732,6 +732,26 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock,
#endif
SSL_set_mode(self->ssl, mode);
+#ifdef TLS1_3_VERSION
+ if (sslctx->post_handshake_auth == 1) {
+ if (socket_type == PY_SSL_SERVER) {
+ /* bpo-37428: OpenSSL does not ignore SSL_VERIFY_POST_HANDSHAKE.
+ * Set SSL_VERIFY_POST_HANDSHAKE flag only for server sockets and
+ * only in combination with SSL_VERIFY_PEER flag. */
+ int mode = SSL_get_verify_mode(self->ssl);
+ if (mode & SSL_VERIFY_PEER) {
+ int (*verify_cb)(int, X509_STORE_CTX *) = NULL;
+ verify_cb = SSL_get_verify_callback(self->ssl);
+ mode |= SSL_VERIFY_POST_HANDSHAKE;
+ SSL_set_verify(self->ssl, mode, verify_cb);
+ }
+ } else {
+ /* client socket */
+ SSL_set_post_handshake_auth(self->ssl, 1);
+ }
+ }
+#endif
+
#if HAVE_SNI
if (server_hostname != NULL) {
/* Don't send SNI for IP addresses. We cannot simply use inet_aton() and
@@ -2765,10 +2785,10 @@ _set_verify_mode(PySSLContext *self, enum py_ssl_cert_requirements n)
"invalid value for verify_mode");
return -1;
}
-#ifdef TLS1_3_VERSION
- if (self->post_handshake_auth)
- mode |= SSL_VERIFY_POST_HANDSHAKE;
-#endif
+
+ /* bpo-37428: newPySSLSocket() sets SSL_VERIFY_POST_HANDSHAKE flag for
+ * server sockets and SSL_set_post_handshake_auth() for client. */
+
/* keep current verify cb */
verify_cb = SSL_CTX_get_verify_callback(self->ctx);
SSL_CTX_set_verify(self->ctx, mode, verify_cb);
@@ -3346,8 +3366,6 @@ get_post_handshake_auth(PySSLContext *self, void *c) {
#if TLS1_3_VERSION
static int
set_post_handshake_auth(PySSLContext *self, PyObject *arg, void *c) {
- int (*verify_cb)(int, X509_STORE_CTX *) = NULL;
- int mode = SSL_CTX_get_verify_mode(self->ctx);
int pha = PyObject_IsTrue(arg);
if (pha == -1) {
@@ -3355,17 +3373,8 @@ set_post_handshake_auth(PySSLContext *self, PyObject *arg, void *c) {
}
self->post_handshake_auth = pha;
- /* client-side socket setting, ignored by server-side */
- SSL_CTX_set_post_handshake_auth(self->ctx, pha);
-
- /* server-side socket setting, ignored by client-side */
- verify_cb = SSL_CTX_get_verify_callback(self->ctx);
- if (pha) {
- mode |= SSL_VERIFY_POST_HANDSHAKE;
- } else {
- mode ^= SSL_VERIFY_POST_HANDSHAKE;
- }
- SSL_CTX_set_verify(self->ctx, mode, verify_cb);
+ /* bpo-37428: newPySSLSocket() sets SSL_VERIFY_POST_HANDSHAKE flag for
+ * server sockets and SSL_set_post_handshake_auth() for client. */
return 0;
}

@ -0,0 +1,70 @@
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index 2f59ece..d756916 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -88,6 +88,11 @@ The module provides the following classes:
:func:`ssl._create_unverified_context` can be passed to the *context*
parameter.
+ .. versionchanged:: 3.7.4
+ This class now enables TLS 1.3
+ :attr:`ssl.SSLContext.post_handshake_auth` for the default *context* or
+ when *cert_file* is passed with a custom *context*.
+
.. deprecated:: 3.6
*key_file* and *cert_file* are deprecated in favor of *context*.
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 1a6bd8a..f0d2642 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -1390,6 +1390,9 @@ else:
self.cert_file = cert_file
if context is None:
context = ssl._create_default_https_context()
+ # enable PHA for TLS 1.3 connections if available
+ if context.post_handshake_auth is not None:
+ context.post_handshake_auth = True
will_verify = context.verify_mode != ssl.CERT_NONE
if check_hostname is None:
check_hostname = context.check_hostname
@@ -1398,6 +1401,10 @@ else:
"either CERT_OPTIONAL or CERT_REQUIRED")
if key_file or cert_file:
context.load_cert_chain(cert_file, key_file)
+ # cert and key file means the user wants to authenticate.
+ # enable TLS 1.3 PHA implicitly even for custom contexts.
+ if context.post_handshake_auth is not None:
+ context.post_handshake_auth = True
self._context = context
self._check_hostname = check_hostname
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 714d521..5795b7a 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1709,6 +1709,24 @@ class HTTPSTest(TestCase):
self.assertEqual(h, c.host)
self.assertEqual(p, c.port)
+ def test_tls13_pha(self):
+ import ssl
+ if not ssl.HAS_TLSv1_3:
+ self.skipTest('TLS 1.3 support required')
+ # just check status of PHA flag
+ h = client.HTTPSConnection('localhost', 443)
+ self.assertTrue(h._context.post_handshake_auth)
+
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ self.assertFalse(context.post_handshake_auth)
+ h = client.HTTPSConnection('localhost', 443, context=context)
+ self.assertIs(h._context, context)
+ self.assertFalse(h._context.post_handshake_auth)
+
+ h = client.HTTPSConnection('localhost', 443, context=context,
+ cert_file=CERT_localhost)
+ self.assertTrue(h._context.post_handshake_auth)
+
class RequestBodyTest(TestCase):
"""Test cases where a request includes a message body."""

File diff suppressed because it is too large Load Diff

@ -0,0 +1,93 @@
diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py
index adf956d..97599d4 100644
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@@ -1148,6 +1148,11 @@ class DefaultCookiePolicy(CookiePolicy):
req_host, erhn = eff_request_host(request)
domain = cookie.domain
+ if domain and not domain.startswith("."):
+ dotdomain = "." + domain
+ else:
+ dotdomain = domain
+
# strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
if (cookie.version == 0 and
(self.strict_ns_domain & self.DomainStrictNonDomain) and
@@ -1160,7 +1165,7 @@ class DefaultCookiePolicy(CookiePolicy):
_debug(" effective request-host name %s does not domain-match "
"RFC 2965 cookie domain %s", erhn, domain)
return False
- if cookie.version == 0 and not ("."+erhn).endswith(domain):
+ if cookie.version == 0 and not ("."+erhn).endswith(dotdomain):
_debug(" request-host %s does not match Netscape cookie domain "
"%s", req_host, domain)
return False
@@ -1174,7 +1179,11 @@ class DefaultCookiePolicy(CookiePolicy):
req_host = "."+req_host
if not erhn.startswith("."):
erhn = "."+erhn
- if not (req_host.endswith(domain) or erhn.endswith(domain)):
+ if domain and not domain.startswith("."):
+ dotdomain = "." + domain
+ else:
+ dotdomain = domain
+ if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)):
#_debug(" request domain %s does not match cookie domain %s",
# req_host, domain)
return False
diff --git a/Lib/test/test_http_cookiejar.py b/Lib/test/test_http_cookiejar.py
index abc625d..6e1b308 100644
--- a/Lib/test/test_http_cookiejar.py
+++ b/Lib/test/test_http_cookiejar.py
@@ -415,6 +415,7 @@ class CookieTests(unittest.TestCase):
("http://foo.bar.com/", ".foo.bar.com", True),
("http://foo.bar.com/", "foo.bar.com", True),
("http://foo.bar.com/", ".bar.com", True),
+ ("http://foo.bar.com/", "bar.com", True),
("http://foo.bar.com/", "com", True),
("http://foo.com/", "rhubarb.foo.com", False),
("http://foo.com/", ".foo.com", True),
@@ -425,6 +426,8 @@ class CookieTests(unittest.TestCase):
("http://foo/", "foo", True),
("http://foo/", "foo.local", True),
("http://foo/", ".local", True),
+ ("http://barfoo.com", ".foo.com", False),
+ ("http://barfoo.com", "foo.com", False),
]:
request = urllib.request.Request(url)
r = pol.domain_return_ok(domain, request)
@@ -959,6 +962,33 @@ class CookieTests(unittest.TestCase):
c.add_cookie_header(req)
self.assertFalse(req.has_header("Cookie"))
+ c.clear()
+
+ pol.set_blocked_domains([])
+ req = urllib.request.Request("http://acme.com/")
+ res = FakeResponse(headers, "http://acme.com/")
+ cookies = c.make_cookies(res, req)
+ c.extract_cookies(res, req)
+ self.assertEqual(len(c), 1)
+
+ req = urllib.request.Request("http://acme.com/")
+ c.add_cookie_header(req)
+ self.assertTrue(req.has_header("Cookie"))
+
+ req = urllib.request.Request("http://badacme.com/")
+ c.add_cookie_header(req)
+ self.assertFalse(pol.return_ok(cookies[0], req))
+ self.assertFalse(req.has_header("Cookie"))
+
+ p = pol.set_blocked_domains(["acme.com"])
+ req = urllib.request.Request("http://acme.com/")
+ c.add_cookie_header(req)
+ self.assertFalse(req.has_header("Cookie"))
+
+ req = urllib.request.Request("http://badacme.com/")
+ c.add_cookie_header(req)
+ self.assertFalse(req.has_header("Cookie"))
+
def test_secure(self):
for ns in True, False:
for whitespace in " ", "":

@ -0,0 +1,95 @@
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 737951e4b1b1..bc9c9b6241d4 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1561,6 +1561,8 @@ def get_domain(value):
token, value = get_dot_atom(value)
except errors.HeaderParseError:
token, value = get_atom(value)
+ if value and value[0] == '@':
+ raise errors.HeaderParseError('Invalid Domain')
if leader is not None:
token[:0] = [leader]
domain.append(token)
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py
index cdfa3729adc7..41ff6f8c000d 100644
--- a/Lib/email/_parseaddr.py
+++ b/Lib/email/_parseaddr.py
@@ -379,7 +379,12 @@ def getaddrspec(self):
aslist.append('@')
self.pos += 1
self.gotonext()
- return EMPTYSTRING.join(aslist) + self.getdomain()
+ domain = self.getdomain()
+ if not domain:
+ # Invalid domain, return an empty address instead of returning a
+ # local part to denote failed parsing.
+ return EMPTYSTRING
+ return EMPTYSTRING.join(aslist) + domain
def getdomain(self):
"""Get the complete domain name from an address."""
@@ -394,6 +399,10 @@ def getdomain(self):
elif self.field[self.pos] == '.':
self.pos += 1
sdlist.append('.')
+ elif self.field[self.pos] == '@':
+ # bpo-34155: Don't parse domains with two `@` like
+ # `a@malicious.org@important.com`.
+ return EMPTYSTRING
elif self.field[self.pos] in self.atomends:
break
else:
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index a2c900fa7fd2..02ef3e1006c6 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -1418,6 +1418,16 @@ def test_get_addr_spec_dot_atom(self):
self.assertEqual(addr_spec.domain, 'example.com')
self.assertEqual(addr_spec.addr_spec, 'star.a.star@example.com')
+ def test_get_addr_spec_multiple_domains(self):
+ with self.assertRaises(errors.HeaderParseError):
+ parser.get_addr_spec('star@a.star@example.com')
+
+ with self.assertRaises(errors.HeaderParseError):
+ parser.get_addr_spec('star@a@example.com')
+
+ with self.assertRaises(errors.HeaderParseError):
+ parser.get_addr_spec('star@172.17.0.1@example.com')
+
# get_obs_route
def test_get_obs_route_simple(self):
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index f97ccc6711cc..68d052279987 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3035,6 +3035,20 @@ def test_parseaddr_empty(self):
self.assertEqual(utils.parseaddr('<>'), ('', ''))
self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
+ def test_parseaddr_multiple_domains(self):
+ self.assertEqual(
+ utils.parseaddr('a@b@c'),
+ ('', '')
+ )
+ self.assertEqual(
+ utils.parseaddr('a@b.c@c'),
+ ('', '')
+ )
+ self.assertEqual(
+ utils.parseaddr('a@172.17.0.1@c'),
+ ('', '')
+ )
+
def test_noquote_dump(self):
self.assertEqual(
utils.formataddr(('A Silly Person', 'person@dom.ain')),
diff --git a/Misc/NEWS.d/next/Security/2019-05-04-13-33-37.bpo-34155.MJll68.rst b/Misc/NEWS.d/next/Security/2019-05-04-13-33-37.bpo-34155.MJll68.rst
new file mode 100644
index 000000000000..50292e29ed1d
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2019-05-04-13-33-37.bpo-34155.MJll68.rst
@@ -0,0 +1 @@
+Fix parsing of invalid email addresses with more than one ``@`` (e.g. a@b@c.com.) to not return the part before 2nd ``@`` as valid email address. Patch by maxking & jpic.

@ -0,0 +1,296 @@
diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py
index 538ff05..e7f2013 100644
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@@ -263,7 +263,9 @@ def _create_parser():
help='only write the name of test cases that will be run'
' , don\'t execute them')
group.add_argument('-P', '--pgo', dest='pgo', action='store_true',
- help='enable Profile Guided Optimization training')
+ help='enable Profile Guided Optimization (PGO) training')
+ group.add_argument('--pgo-extended', action='store_true',
+ help='enable extended PGO training (slower training)')
group.add_argument('--fail-env-changed', action='store_true',
help='if a test file alters the environment, mark '
'the test as failed')
@@ -339,6 +341,8 @@ def _parse_args(args, **kwargs):
parser.error("-G/--failfast needs either -v or -W")
if ns.pgo and (ns.verbose or ns.verbose2 or ns.verbose3):
parser.error("--pgo/-v don't go together!")
+ if ns.pgo_extended:
+ ns.pgo = True # pgo_extended implies pgo
if ns.nowindows:
print("Warning: the --nowindows (-n) option is deprecated. "
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index b6d05f6..524dbfa 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -17,6 +17,7 @@ from test.libregrtest.runtest import (
INTERRUPTED, CHILD_ERROR, TEST_DID_NOT_RUN,
PROGRESS_MIN_TIME, format_test_result)
from test.libregrtest.setup import setup_tests
+from test.libregrtest.pgo import setup_pgo_tests
from test.libregrtest.utils import removepy, count, format_duration, printlist
from test import support
try:
@@ -214,6 +215,10 @@ class Regrtest:
removepy(self.tests)
+ if self.ns.pgo:
+ # add default PGO tests if no tests are specified
+ setup_pgo_tests(self.ns)
+
stdtests = STDTESTS[:]
nottests = NOTTESTS.copy()
if self.ns.exclude:
@@ -601,6 +606,7 @@ class Regrtest:
input("Press any key to continue...")
support.PGO = self.ns.pgo
+ support.PGO_EXTENDED = self.ns.pgo_extended
setup_tests(self.ns)
diff --git a/Lib/test/libregrtest/pgo.py b/Lib/test/libregrtest/pgo.py
new file mode 100644
index 0000000..379ff05
--- /dev/null
+++ b/Lib/test/libregrtest/pgo.py
@@ -0,0 +1,55 @@
+# Set of tests run by default if --pgo is specified. The tests below were
+# chosen based on the following criteria: either they exercise a commonly used
+# C extension module or type, or they run some relatively typical Python code.
+# Long running tests should be avoided because the PGO instrumented executable
+# runs slowly.
+PGO_TESTS = [
+ 'test_array',
+ 'test_base64',
+ 'test_binascii',
+ 'test_binop',
+ 'test_bisect',
+ 'test_bytes',
+ 'test_bz2',
+ 'test_cmath',
+ 'test_codecs',
+ 'test_collections',
+ 'test_complex',
+ 'test_dataclasses',
+ 'test_datetime',
+ 'test_decimal',
+ 'test_difflib',
+ 'test_embed',
+ 'test_float',
+ 'test_fstring',
+ 'test_functools',
+ 'test_generators',
+ 'test_hashlib',
+ 'test_heapq',
+ 'test_int',
+ 'test_itertools',
+ 'test_json',
+ 'test_long',
+ 'test_lzma',
+ 'test_math',
+ 'test_memoryview',
+ 'test_operator',
+ 'test_ordered_dict',
+ 'test_pickle',
+ 'test_pprint',
+ 'test_re',
+ 'test_set',
+ 'test_sqlite',
+ 'test_statistics',
+ 'test_struct',
+ 'test_tabnanny',
+ 'test_time',
+ 'test_unicode',
+ 'test_xml_etree',
+ 'test_xml_etree_c',
+]
+
+def setup_pgo_tests(ns):
+ if not ns.args and not ns.pgo_extended:
+ # run default set of tests for PGO training
+ ns.args = PGO_TESTS[:]
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 764057a..468ee46 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2039,6 +2039,7 @@ class AbstractPickleTests(unittest.TestCase):
FRAME_SIZE_TARGET = 64 * 1024
+ @support.skip_if_pgo_task
def check_frame_opcodes(self, pickled):
"""
Check the arguments of FRAME opcodes in a protocol 4+ pickle.
@@ -2059,6 +2060,7 @@ class AbstractPickleTests(unittest.TestCase):
frame_size = len(pickled) - last_pos - frame_opcode_size
self.assertEqual(frame_size, last_arg)
+ @support.skip_if_pgo_task
def test_framing_many_objects(self):
obj = list(range(10**5))
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 66c0fed..e80a819 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -953,6 +953,10 @@ SAVEDCWD = os.getcwd()
# useful for PGO
PGO = False
+# Set by libregrtest/main.py if we are running the extended (time consuming)
+# PGO task. If this is True, PGO is also True.
+PGO_EXTENDED = False
+
@contextlib.contextmanager
def temp_dir(path=None, quiet=False):
"""Return a context manager that creates a temporary directory.
@@ -2442,6 +2446,11 @@ def skip_unless_xattr(test):
msg = "no non-broken extended attribute support"
return test if ok else unittest.skip(msg)(test)
+def skip_if_pgo_task(test):
+ """Skip decorator for tests not run in (non-extended) PGO task"""
+ ok = not PGO or PGO_EXTENDED
+ msg = "Not run for (non-extended) PGO task"
+ return test if ok else unittest.skip(msg)(test)
def fs_is_case_insensitive(directory):
"""Detects if the file system for the specified directory is case-insensitive."""
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
index f340f23..ebb151c 100644
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -654,6 +654,7 @@ class BZ2CompressorTest(BaseTest):
data += bz2c.flush()
self.assertEqual(ext_decompress(data), self.TEXT)
+ @support.skip_if_pgo_task
@bigmemtest(size=_4G + 100, memuse=2)
def testCompress4G(self, size):
# "Test BZ2Compressor.compress()/flush() with >4GiB input"
@@ -712,6 +713,7 @@ class BZ2DecompressorTest(BaseTest):
self.assertRaises(EOFError, bz2d.decompress, b"anything")
self.assertRaises(EOFError, bz2d.decompress, b"")
+ @support.skip_if_pgo_task
@bigmemtest(size=_4G + 100, memuse=3.3)
def testDecompress4G(self, size):
# "Test BZ2Decompressor.decompress() with >4GiB input"
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 9317951..8c1d016 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -2023,6 +2023,7 @@ class RegressionTests(unittest.TestCase):
self.assertRaises(AssertionError, list, cycle(gen1()))
self.assertEqual(hist, [0,1])
+ @support.skip_if_pgo_task
def test_long_chain_of_empty_iterables(self):
# Make sure itertools.chain doesn't run into recursion limits when
# dealing with long chains of empty iterables. Even with a high
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
index 3dc2c1e..117de0a 100644
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -333,6 +333,7 @@ class CompressorDecompressorTestCase(unittest.TestCase):
# Test with inputs larger than 4GiB.
+ @support.skip_if_pgo_task
@bigmemtest(size=_4G + 100, memuse=2)
def test_compressor_bigmem(self, size):
lzc = LZMACompressor()
@@ -344,6 +345,7 @@ class CompressorDecompressorTestCase(unittest.TestCase):
finally:
ddata = None
+ @support.skip_if_pgo_task
@bigmemtest(size=_4G + 100, memuse=3)
def test_decompressor_bigmem(self, size):
lzd = LZMADecompressor()
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index 5347bb1..9d83217 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -6,6 +6,7 @@ Note: test_regrtest cannot be run twice in parallel.
import contextlib
import faulthandler
+import glob
import io
import os.path
import platform
@@ -532,6 +533,31 @@ class BaseTestCase(unittest.TestCase):
return proc.stdout
+class CheckActualTests(BaseTestCase):
+ """
+ Check that regrtest appears to find the expected set of tests.
+ """
+
+ def test_finds_expected_number_of_tests(self):
+ args = ['-Wd', '-E', '-bb', '-m', 'test.regrtest', '--list-tests']
+ output = self.run_python(args)
+ rough_number_of_tests_found = len(output.splitlines())
+ actual_testsuite_glob = os.path.join(os.path.dirname(__file__),
+ 'test*.py')
+ rough_counted_test_py_files = len(glob.glob(actual_testsuite_glob))
+ # We're not trying to duplicate test finding logic in here,
+ # just give a rough estimate of how many there should be and
+ # be near that. This is a regression test to prevent mishaps
+ # such as https://bugs.python.org/issue37667 in the future.
+ # If you need to change the values in here during some
+ # mythical future test suite reorganization, don't go
+ # overboard with logic and keep that goal in mind.
+ self.assertGreater(rough_number_of_tests_found,
+ rough_counted_test_py_files*9//10,
+ msg='Unexpectedly low number of tests found in:\n'
+ f'{", ".join(output.splitlines())}')
+
+
class ProgramsTestCase(BaseTestCase):
"""
Test various ways to run the Python test suite. Use options close
diff --git a/Makefile.pre.in b/Makefile.pre.in
index b452289..cc428ac 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -247,9 +247,10 @@ TCLTK_INCLUDES= @TCLTK_INCLUDES@
TCLTK_LIBS= @TCLTK_LIBS@
# The task to run while instrumented when building the profile-opt target.
-# We exclude unittests with -x that take a rediculious amount of time to
-# run in the instrumented training build or do not provide much value.
-PROFILE_TASK=-m test.regrtest --pgo
+# To speed up profile generation, we don't run the full unit test suite
+# by default. The default is "-m test --pgo". To run more tests, use
+# PROFILE_TASK="-m test --pgo-extended"
+PROFILE_TASK= @PROFILE_TASK@
# report files for gcov / lcov coverage report
COVERAGE_INFO= $(abs_builddir)/coverage.info
diff --git a/configure.ac b/configure.ac
index c071ec3..816fc5a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1308,6 +1308,14 @@ else
DEF_MAKE_RULE="all"
fi
+AC_ARG_VAR(PROFILE_TASK, Python args for PGO generation task)
+AC_MSG_CHECKING(PROFILE_TASK)
+if test -z "$PROFILE_TASK"
+then
+ PROFILE_TASK='-m test --pgo'
+fi
+AC_MSG_RESULT($PROFILE_TASK)
+
# Make llvm-relatec checks work on systems where llvm tools are not installed with their
# normal names in the default $PATH (ie: Ubuntu). They exist under the
# non-suffixed name in their versioned llvm directory.

@ -0,0 +1,22 @@
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index 9c15fca..c972409 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -279,8 +279,15 @@ class DebuggerTests(unittest.TestCase):
# gdb can insert additional '\n' and space characters in various places
# in its output, depending on the width of the terminal it's connected
# to (using its "wrap_here" function)
- m = re.match(r'.*#0\s+builtin_id\s+\(self\=.*,\s+v=\s*(.*?)\)\s+at\s+\S*Python/bltinmodule.c.*',
- gdb_output, re.DOTALL)
+ m = re.search(
+ # Match '#0 builtin_id(self=..., v=...)'
+ r'#0\s+builtin_id\s+\(self\=.*,\s+v=\s*(.*?)?\)'
+ # Match ' at Python/bltinmodule.c'.
+ # bpo-38239: builtin_id() is defined in Python/bltinmodule.c,
+ # but accept any "Directory\file.c" to support Link Time
+ # Optimization (LTO).
+ r'\s+at\s+\S*[A-Za-z]+/[A-Za-z0-9_-]+\.c',
+ gdb_output, re.DOTALL)
if not m:
self.fail('Unexpected gdb output: %r\n%s' % (gdb_output, gdb_output))
return m.group(1), gdb_output

@ -0,0 +1,54 @@
diff --git a/Lib/test/test_docxmlrpc.py b/Lib/test/test_docxmlrpc.py
index 0090333..d2adb21 100644
--- a/Lib/test/test_docxmlrpc.py
+++ b/Lib/test/test_docxmlrpc.py
@@ -1,5 +1,6 @@
from xmlrpc.server import DocXMLRPCServer
import http.client
+import re
import sys
from test import support
threading = support.import_module('threading')
@@ -193,6 +194,21 @@ class DocXMLRPCHTTPGETServer(unittest.TestCase):
b'method_annotation</strong></a>(x: bytes)</dt></dl>'),
response.read())
+ def test_server_title_escape(self):
+ # bpo-38243: Ensure that the server title and documentation
+ # are escaped for HTML.
+ self.serv.set_server_title('test_title<script>')
+ self.serv.set_server_documentation('test_documentation<script>')
+ self.assertEqual('test_title<script>', self.serv.server_title)
+ self.assertEqual('test_documentation<script>',
+ self.serv.server_documentation)
+
+ generated = self.serv.generate_html_documentation()
+ title = re.search(r'<title>(.+?)</title>', generated).group()
+ documentation = re.search(r'<p><tt>(.+?)</tt></p>', generated).group()
+ self.assertEqual('<title>Python: test_title&lt;script&gt;</title>', title)
+ self.assertEqual('<p><tt>test_documentation&lt;script&gt;</tt></p>', documentation)
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py
index 3e0dca0..efe5937 100644
--- a/Lib/xmlrpc/server.py
+++ b/Lib/xmlrpc/server.py
@@ -106,6 +106,7 @@ server.handle_request()
from xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode
from http.server import BaseHTTPRequestHandler
+import html
import http.server
import socketserver
import sys
@@ -904,7 +905,7 @@ class XMLRPCDocGenerator:
methods
)
- return documenter.page(self.server_title, documentation)
+ return documenter.page(html.escape(self.server_title), documentation)
class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
"""XML-RPC and documentation request handler class.

@ -0,0 +1,44 @@
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
index d0cd84f..9d2c28c 100644
--- a/Lib/test/test_site.py
+++ b/Lib/test/test_site.py
@@ -10,6 +10,7 @@ from test import support
from test.support import (captured_stderr, TESTFN, EnvironmentVarGuard,
change_cwd)
import builtins
+import glob
import os
import sys
import re
@@ -500,6 +501,23 @@ class ImportSideEffectTests(unittest.TestCase):
class StartupImportTests(unittest.TestCase):
def test_startup_imports(self):
+ # Get sys.path in isolated mode (python3 -I)
+ popen = subprocess.Popen([sys.executable, '-I', '-c',
+ 'import sys; print(repr(sys.path))'],
+ stdout=subprocess.PIPE,
+ encoding='utf-8')
+ stdout = popen.communicate()[0]
+ self.assertEqual(popen.returncode, 0, repr(stdout))
+ isolated_paths = eval(stdout)
+
+ # bpo-27807: Even with -I, the site module executes all .pth files
+ # found in sys.path (see site.addpackage()). Skip the test if at least
+ # one .pth file is found.
+ for path in isolated_paths:
+ pth_files = glob.glob(os.path.join(path, "*.pth"))
+ if pth_files:
+ self.skipTest(f"found {len(pth_files)} .pth files in: {path}")
+
# This tests checks which modules are loaded by Python when it
# initially starts upon startup.
popen = subprocess.Popen([sys.executable, '-I', '-v', '-c',
@@ -508,6 +526,7 @@ class StartupImportTests(unittest.TestCase):
stderr=subprocess.PIPE,
encoding='utf-8')
stdout, stderr = popen.communicate()
+ self.assertEqual(popen.returncode, 0, (stdout, stderr))
modules = eval(stdout)
self.assertIn('site', modules)

@ -0,0 +1,193 @@
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 876fcd4..fe9a32b 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1445,40 +1445,64 @@ class HandlerTests(unittest.TestCase):
bypass = {'exclude_simple': True, 'exceptions': []}
self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass))
- def test_basic_auth(self, quote_char='"'):
- opener = OpenerDirector()
- password_manager = MockPasswordManager()
- auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
- realm = "ACME Widget Store"
- http_handler = MockHTTPHandler(
- 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
- (quote_char, realm, quote_char))
- opener.add_handler(auth_handler)
- opener.add_handler(http_handler)
- self._test_basic_auth(opener, auth_handler, "Authorization",
- realm, http_handler, password_manager,
- "http://acme.example.com/protected",
- "http://acme.example.com/protected",
- )
-
- def test_basic_auth_with_single_quoted_realm(self):
- self.test_basic_auth(quote_char="'")
-
- def test_basic_auth_with_unquoted_realm(self):
- opener = OpenerDirector()
- password_manager = MockPasswordManager()
- auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
- realm = "ACME Widget Store"
- http_handler = MockHTTPHandler(
- 401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm)
- opener.add_handler(auth_handler)
- opener.add_handler(http_handler)
- with self.assertWarns(UserWarning):
+ def check_basic_auth(self, headers, realm):
+ with self.subTest(realm=realm, headers=headers):
+ opener = OpenerDirector()
+ password_manager = MockPasswordManager()
+ auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
+ body = '\r\n'.join(headers) + '\r\n\r\n'
+ http_handler = MockHTTPHandler(401, body)
+ opener.add_handler(auth_handler)
+ opener.add_handler(http_handler)
self._test_basic_auth(opener, auth_handler, "Authorization",
- realm, http_handler, password_manager,
- "http://acme.example.com/protected",
- "http://acme.example.com/protected",
- )
+ realm, http_handler, password_manager,
+ "http://acme.example.com/protected",
+ "http://acme.example.com/protected")
+
+ def test_basic_auth(self):
+ realm = "realm2@example.com"
+ realm2 = "realm2@example.com"
+ basic = f'Basic realm="{realm}"'
+ basic2 = f'Basic realm="{realm2}"'
+ other_no_realm = 'Otherscheme xxx'
+ digest = (f'Digest realm="{realm2}", '
+ f'qop="auth, auth-int", '
+ f'nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", '
+ f'opaque="5ccc069c403ebaf9f0171e9517f40e41"')
+ for realm_str in (
+ # test "quote" and 'quote'
+ f'Basic realm="{realm}"',
+ f"Basic realm='{realm}'",
+
+ # charset is ignored
+ f'Basic realm="{realm}", charset="UTF-8"',
+
+ # Multiple challenges per header
+ f'{basic}, {basic2}',
+ f'{basic}, {other_no_realm}',
+ f'{other_no_realm}, {basic}',
+ f'{basic}, {digest}',
+ f'{digest}, {basic}',
+ ):
+ headers = [f'WWW-Authenticate: {realm_str}']
+ self.check_basic_auth(headers, realm)
+
+ # no quote: expect a warning
+ with support.check_warnings(("Basic Auth Realm was unquoted",
+ UserWarning)):
+ headers = [f'WWW-Authenticate: Basic realm={realm}']
+ self.check_basic_auth(headers, realm)
+
+ # Multiple headers: one challenge per header.
+ # Use the first Basic realm.
+ for challenges in (
+ [basic, basic2],
+ [basic, digest],
+ [digest, basic],
+ ):
+ headers = [f'WWW-Authenticate: {challenge}'
+ for challenge in challenges]
+ self.check_basic_auth(headers, realm)
def test_proxy_basic_auth(self):
opener = OpenerDirector()
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index c9945d9..6624e04 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -945,8 +945,15 @@ class AbstractBasicAuthHandler:
# allow for double- and single-quoted realm values
# (single quotes are a violation of the RFC, but appear in the wild)
- rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\']?)([^"\']*)\\2', re.I)
+ rx = re.compile('(?:^|,)' # start of the string or ','
+ '[ \t]*' # optional whitespaces
+ '([^ \t]+)' # scheme like "Basic"
+ '[ \t]+' # mandatory whitespaces
+ # realm=xxx
+ # realm='xxx'
+ # realm="xxx"
+ 'realm=(["\']?)([^"\']*)\\2',
+ re.I)
# XXX could pre-emptively send auth info already accepted (RFC 2617,
# end of section 2, and section 1.2 immediately after "credentials"
@@ -958,27 +965,51 @@ class AbstractBasicAuthHandler:
self.passwd = password_mgr
self.add_password = self.passwd.add_password
+ def _parse_realm(self, header):
+ # parse WWW-Authenticate header: accept multiple challenges per header
+ found_challenge = False
+ for mo in AbstractBasicAuthHandler.rx.finditer(header):
+ scheme, quote, realm = mo.groups()
+ if quote not in ['"', "'"]:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 3)
+
+ yield (scheme, realm)
+
+ found_challenge = True
+
+ if not found_challenge:
+ if header:
+ scheme = header.split()[0]
+ else:
+ scheme = ''
+ yield (scheme, None)
+
def http_error_auth_reqed(self, authreq, host, req, headers):
# host may be an authority (without userinfo) or a URL with an
# authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
+ headers = headers.get_all(authreq)
+ if not headers:
+ # no header found
+ return
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() != 'basic':
- raise ValueError("AbstractBasicAuthHandler does not"
- " support the following scheme: '%s'" %
- scheme)
- else:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if quote not in ['"',"'"]:
- warnings.warn("Basic Auth Realm was unquoted",
- UserWarning, 2)
- if scheme.lower() == 'basic':
- return self.retry_http_basic_auth(host, req, realm)
+ unsupported = None
+ for header in headers:
+ for scheme, realm in self._parse_realm(header):
+ if scheme.lower() != 'basic':
+ unsupported = scheme
+ continue
+
+ if realm is not None:
+ # Use the first matching Basic challenge.
+ # Ignore following challenges even if they use the Basic
+ # scheme.
+ return self.retry_http_basic_auth(host, req, realm)
+
+ if unsupported is not None:
+ raise ValueError("AbstractBasicAuthHandler does not "
+ "support the following scheme: %r"
+ % (scheme,))
def retry_http_basic_auth(self, host, req, realm):
user, pw = self.passwd.find_user_password(realm, host)

@ -0,0 +1,67 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 15 Jul 2020 05:36:36 -0700
Subject: [PATCH] 00351: Avoid infinite loop in the tarfile module
Avoid infinite loop when reading specially crafted TAR files using the tarfile module
(CVE-2019-20907).
Fixed upstream: https://bugs.python.org/issue39017
---
Lib/tarfile.py | 2 ++
Lib/test/recursion.tar | Bin 0 -> 516 bytes
Lib/test/test_tarfile.py | 7 +++++++
.../2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst | 1 +
4 files changed, 10 insertions(+)
create mode 100644 Lib/test/recursion.tar
create mode 100644 Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 62d22150f5..2ea47978ff 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1231,6 +1231,8 @@ class TarInfo(object):
length, keyword = match.groups()
length = int(length)
+ if length == 0:
+ raise InvalidHeaderError("invalid header")
value = buf[match.end(2) + 1:match.start(1) + length - 1]
# Normally, we could just use "utf-8" as the encoding and "strict"
diff --git a/Lib/test/recursion.tar b/Lib/test/recursion.tar
new file mode 100644
index 0000000000000000000000000000000000000000..b8237251964983f54ed1966297e887636cd0c5f4
GIT binary patch
literal 516
zcmYdFPRz+kEn=W0Fn}74P8%Xw3X=l~85kIuo0>8xq$A1Gm}!7)KUsFc41m#O8A5+e
I1_}|j06>QaCIA2c
literal 0
HcmV?d00001
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 4cd7d5370f..573be812ea 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -395,6 +395,13 @@ class CommonReadTest(ReadTest):
with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
tar.extractfile(t).read()
+ def test_length_zero_header(self):
+ # bpo-39017 (CVE-2019-20907): reading a zero-length header should fail
+ # with an exception
+ with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"):
+ with tarfile.open(support.findfile('recursion.tar')) as tar:
+ pass
+
class MiscReadTestBase(CommonReadTest):
def requires_name_attribute(self):
pass
diff --git a/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst
new file mode 100644
index 0000000000..ad26676f8b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst
@@ -0,0 +1 @@
+Avoid infinite loop when reading specially crafted TAR files using the tarfile module (CVE-2019-20907).

@ -0,0 +1,70 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tapas Kundu <39723251+tapakund@users.noreply.github.com>
Date: Wed, 1 Jul 2020 01:00:22 +0530
Subject: [PATCH] 00352: Resolve hash collisions for IPv4Interface and
IPv6Interface
CVE-2020-14422
The hash() methods of classes IPv4Interface and IPv6Interface had issue
of generating constant hash values of 32 and 128 respectively causing hash collisions.
The fix uses the hash() function to generate hash values for the objects
instead of XOR operation.
Fixed upstream: https://bugs.python.org/issue41004
---
Lib/ipaddress.py | 4 ++--
Lib/test/test_ipaddress.py | 11 +++++++++++
.../Security/2020-06-29-16-02-29.bpo-41004.ovF0KZ.rst | 1 +
3 files changed, 14 insertions(+), 2 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2020-06-29-16-02-29.bpo-41004.ovF0KZ.rst
diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py
index 583f02ad54..98492136ca 100644
--- a/Lib/ipaddress.py
+++ b/Lib/ipaddress.py
@@ -1418,7 +1418,7 @@ class IPv4Interface(IPv4Address):
return False
def __hash__(self):
- return self._ip ^ self._prefixlen ^ int(self.network.network_address)
+ return hash((self._ip, self._prefixlen, int(self.network.network_address)))
__reduce__ = _IPAddressBase.__reduce__
@@ -2092,7 +2092,7 @@ class IPv6Interface(IPv6Address):
return False
def __hash__(self):
- return self._ip ^ self._prefixlen ^ int(self.network.network_address)
+ return hash((self._ip, self._prefixlen, int(self.network.network_address)))
__reduce__ = _IPAddressBase.__reduce__
diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py
index 1cef4217bc..7de444af4a 100644
--- a/Lib/test/test_ipaddress.py
+++ b/Lib/test/test_ipaddress.py
@@ -1990,6 +1990,17 @@ class IpaddrUnitTest(unittest.TestCase):
sixtofouraddr.sixtofour)
self.assertFalse(bad_addr.sixtofour)
+ # issue41004 Hash collisions in IPv4Interface and IPv6Interface
+ def testV4HashIsNotConstant(self):
+ ipv4_address1 = ipaddress.IPv4Interface("1.2.3.4")
+ ipv4_address2 = ipaddress.IPv4Interface("2.3.4.5")
+ self.assertNotEqual(ipv4_address1.__hash__(), ipv4_address2.__hash__())
+
+ # issue41004 Hash collisions in IPv4Interface and IPv6Interface
+ def testV6HashIsNotConstant(self):
+ ipv6_address1 = ipaddress.IPv6Interface("2001:658:22a:cafe:200:0:0:1")
+ ipv6_address2 = ipaddress.IPv6Interface("2001:658:22a:cafe:200:0:0:2")
+ self.assertNotEqual(ipv6_address1.__hash__(), ipv6_address2.__hash__())
if __name__ == '__main__':
unittest.main()
diff --git a/Misc/NEWS.d/next/Security/2020-06-29-16-02-29.bpo-41004.ovF0KZ.rst b/Misc/NEWS.d/next/Security/2020-06-29-16-02-29.bpo-41004.ovF0KZ.rst
new file mode 100644
index 0000000000..f5a9db52ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2020-06-29-16-02-29.bpo-41004.ovF0KZ.rst
@@ -0,0 +1 @@
+CVE-2020-14422: The __hash__() methods of ipaddress.IPv4Interface and ipaddress.IPv6Interface incorrectly generated constant hash values of 32 and 128 respectively. This resulted in always causing hash collisions. The fix uses hash() to generate hash values for the tuple of (address, mask length, network address).

@ -0,0 +1,97 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Lumir Balhar <lbalhar@redhat.com>
Date: Tue, 4 Aug 2020 12:04:03 +0200
Subject: [PATCH] 00353: Original names for architectures with different names
downstream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
https://fedoraproject.org/wiki/Changes/Python_Upstream_Architecture_Names
Pythons in RHEL/Fedora used different names for some architectures
than upstream and other distros (for example ppc64 vs. powerpc64).
This was patched in patch 274, now it is sedded if %with legacy_archnames.
That meant that an extension built with the default upstream settings
(on other distro or as an manylinux wheel) could not been found by Python
on RHEL/Fedora because it had a different suffix.
This patch adds the legacy names to importlib so Python is able
to import extensions with a legacy architecture name in its
file name.
It work both ways, so it support both %with and %without legacy_archnames.
WARNING: This patch has no effect on Python built with bootstrap
enabled because Python/importlib_external.h is not regenerated
and therefore Python during bootstrap contains importlib from
upstream without this feature. It's possible to include
Python/importlib_external.h to this patch but it'd make rebasing
a nightmare because it's basically a binary file.
Co-authored-by: Miro Hrončok <miro@hroncok.cz>
---
Lib/importlib/_bootstrap_external.py | 40 ++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
index 9feec50842..5bb2454a5c 100644
--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -1361,7 +1361,7 @@ def _get_supported_file_loaders():
Each item is a tuple (loader, suffixes).
"""
- extensions = ExtensionFileLoader, _imp.extension_suffixes()
+ extensions = ExtensionFileLoader, _alternative_architectures(_imp.extension_suffixes())
source = SourceFileLoader, SOURCE_SUFFIXES
bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES
return [extensions, source, bytecode]
@@ -1428,7 +1428,7 @@ def _setup(_bootstrap_module):
# Constants
setattr(self_module, '_relax_case', _make_relax_case())
- EXTENSION_SUFFIXES.extend(_imp.extension_suffixes())
+ EXTENSION_SUFFIXES.extend(_alternative_architectures(_imp.extension_suffixes()))
if builtin_os == 'nt':
SOURCE_SUFFIXES.append('.pyw')
if '_d.pyd' in EXTENSION_SUFFIXES:
@@ -1441,3 +1441,39 @@ def _install(_bootstrap_module):
supported_loaders = _get_supported_file_loaders()
sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
sys.meta_path.append(PathFinder)
+
+
+_ARCH_MAP = {
+ "-arm-linux-gnueabi.": "-arm-linux-gnueabihf.",
+ "-armeb-linux-gnueabi.": "-armeb-linux-gnueabihf.",
+ "-mips64-linux-gnu.": "-mips64-linux-gnuabi64.",
+ "-mips64el-linux-gnu.": "-mips64el-linux-gnuabi64.",
+ "-ppc-linux-gnu.": "-powerpc-linux-gnu.",
+ "-ppc-linux-gnuspe.": "-powerpc-linux-gnuspe.",
+ "-ppc64-linux-gnu.": "-powerpc64-linux-gnu.",
+ "-ppc64le-linux-gnu.": "-powerpc64le-linux-gnu.",
+ # The above, but the other way around:
+ "-arm-linux-gnueabihf.": "-arm-linux-gnueabi.",
+ "-armeb-linux-gnueabihf.": "-armeb-linux-gnueabi.",
+ "-mips64-linux-gnuabi64.": "-mips64-linux-gnu.",
+ "-mips64el-linux-gnuabi64.": "-mips64el-linux-gnu.",
+ "-powerpc-linux-gnu.": "-ppc-linux-gnu.",
+ "-powerpc-linux-gnuspe.": "-ppc-linux-gnuspe.",
+ "-powerpc64-linux-gnu.": "-ppc64-linux-gnu.",
+ "-powerpc64le-linux-gnu.": "-ppc64le-linux-gnu.",
+}
+
+
+def _alternative_architectures(suffixes):
+ """Add a suffix with an alternative architecture name
+ to the list of suffixes so an extension built with
+ the default (upstream) setting is loadable with our Pythons
+ """
+
+ for suffix in suffixes:
+ for original, alternative in _ARCH_MAP.items():
+ if original in suffix:
+ suffixes.append(suffix.replace(original, alternative))
+ return suffixes
+
+ return suffixes

@ -0,0 +1,73 @@
diff --git a/Lib/http/client.py b/Lib/http/client.py
index f0d2642..0a044e9 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -151,6 +151,10 @@ _contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
# We are more lenient for assumed real world compatibility purposes.
+# These characters are not allowed within HTTP method names
+# to prevent http header injection.
+_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
+
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -1117,6 +1121,8 @@ class HTTPConnection:
else:
raise CannotSendRequest(self.__state)
+ self._validate_method(method)
+
# Save the method we use, we need it later in the response phase
self._method = method
if not url:
@@ -1207,6 +1213,15 @@ class HTTPConnection:
# For HTTP/1.0, the server will assume "not chunked"
pass
+ def _validate_method(self, method):
+ """Validate a method name for putrequest."""
+ # prevent http header injection
+ match = _contains_disallowed_method_pchar_re.search(method)
+ if match:
+ raise ValueError(
+ f"method can't contain control characters. {method!r} "
+ f"(found at least {match.group()!r})")
+
def putheader(self, header, *values):
"""Send a request header line to the server.
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 5795b7a..af0350f 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -359,6 +359,28 @@ class HeaderTests(TestCase):
self.assertEqual(lines[2], "header: Second: val")
+class HttpMethodTests(TestCase):
+ def test_invalid_method_names(self):
+ methods = (
+ 'GET\r',
+ 'POST\n',
+ 'PUT\n\r',
+ 'POST\nValue',
+ 'POST\nHOST:abc',
+ 'GET\nrHost:abc\n',
+ 'POST\rRemainder:\r',
+ 'GET\rHOST:\n',
+ '\nPUT'
+ )
+
+ for method in methods:
+ with self.assertRaisesRegex(
+ ValueError, "method can't contain control characters"):
+ conn = client.HTTPConnection('example.com')
+ conn.sock = FakeSocket(None)
+ conn.request(method=method, url="/")
+
+
class TransferEncodingTest(TestCase):
expected_body = b"It's just a flesh wound"

@ -0,0 +1,42 @@
diff --git a/Lib/test/multibytecodec_support.py b/Lib/test/multibytecodec_support.py
index f9884c6..98feec2 100644
--- a/Lib/test/multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -300,29 +300,23 @@ class TestBase_Mapping(unittest.TestCase):
self._test_mapping_file_plain()
def _test_mapping_file_plain(self):
- unichrs = lambda s: ''.join(map(chr, map(eval, s.split('+'))))
+ def unichrs(s):
+ return ''.join(chr(int(x, 16)) for x in s.split('+'))
+
urt_wa = {}
with self.open_mapping_file() as f:
for line in f:
if not line:
break
- data = line.split('#')[0].strip().split()
+ data = line.split('#')[0].split()
if len(data) != 2:
continue
- csetval = eval(data[0])
- if csetval <= 0x7F:
- csetch = bytes([csetval & 0xff])
- elif csetval >= 0x1000000:
- csetch = bytes([(csetval >> 24), ((csetval >> 16) & 0xff),
- ((csetval >> 8) & 0xff), (csetval & 0xff)])
- elif csetval >= 0x10000:
- csetch = bytes([(csetval >> 16), ((csetval >> 8) & 0xff),
- (csetval & 0xff)])
- elif csetval >= 0x100:
- csetch = bytes([(csetval >> 8), (csetval & 0xff)])
- else:
+ if data[0][:2] != '0x':
+ self.fail(f"Invalid line: {line!r}")
+ csetch = bytes.fromhex(data[0][2:])
+ if len(csetch) == 1 and 0x80 <= csetch[0]:
continue
unich = unichrs(data[1])

@ -0,0 +1,269 @@
From 0cfd9a7f26488567b9a3e5ec192099a8b80ad9df Mon Sep 17 00:00:00 2001
From: Lumir Balhar <lbalhar@redhat.com>
Date: Tue, 19 Jan 2021 07:55:37 +0100
Subject: [PATCH] [PATCH] bpo-37064: Add -k and -a options to pathfix.py tool
(GH-16387)
* bpo-37064: Add option -k to Tools/scripts/pathfix.py (GH-15548)
Add flag -k to pathscript.py script: preserve shebang flags.
(cherry picked from commit 50254ac4c179cb412e90682098c97db786143929)
* bpo-37064: Add option -a to pathfix.py tool (GH-15717)
Add option -a to Tools/Scripts/pathfix.py script: add flags.
(cherry picked from commit 1dc1acbd73f05f14c974b7ce1041787d7abef31e)
---
Lib/test/test_tools/test_pathfix.py | 104 ++++++++++++++++++++++++++++
Tools/scripts/pathfix.py | 64 +++++++++++++++--
2 files changed, 163 insertions(+), 5 deletions(-)
create mode 100644 Lib/test/test_tools/test_pathfix.py
diff --git a/Lib/test/test_tools/test_pathfix.py b/Lib/test/test_tools/test_pathfix.py
new file mode 100644
index 0000000..1f0585e
--- /dev/null
+++ b/Lib/test/test_tools/test_pathfix.py
@@ -0,0 +1,104 @@
+import os
+import subprocess
+import sys
+import unittest
+from test import support
+from test.test_tools import import_tool, scriptsdir
+
+
+class TestPathfixFunctional(unittest.TestCase):
+ script = os.path.join(scriptsdir, 'pathfix.py')
+
+ def setUp(self):
+ self.temp_file = support.TESTFN
+ self.addCleanup(support.unlink, support.TESTFN)
+
+ def pathfix(self, shebang, pathfix_flags, exitcode=0, stdout='', stderr=''):
+ with open(self.temp_file, 'w', encoding='utf8') as f:
+ f.write(f'{shebang}\n' + 'print("Hello world")\n')
+
+ proc = subprocess.run(
+ [sys.executable, self.script,
+ *pathfix_flags, '-n', self.temp_file],
+ universal_newlines=True, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ if stdout == '' and proc.returncode == 0:
+ stdout = f'{self.temp_file}: updating\n'
+ self.assertEqual(proc.returncode, exitcode, proc)
+ self.assertEqual(proc.stdout, stdout, proc)
+ self.assertEqual(proc.stderr, stderr, proc)
+
+ with open(self.temp_file, 'r', encoding='utf8') as f:
+ output = f.read()
+
+ lines = output.split('\n')
+ self.assertEqual(lines[1:], ['print("Hello world")', ''])
+ new_shebang = lines[0]
+
+ if proc.returncode != 0:
+ self.assertEqual(shebang, new_shebang)
+
+ return new_shebang
+
+ def test_pathfix(self):
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python',
+ ['-i', '/usr/bin/python3']),
+ '#! /usr/bin/python3')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python -R',
+ ['-i', '/usr/bin/python3']),
+ '#! /usr/bin/python3')
+
+ def test_pathfix_keeping_flags(self):
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python -R',
+ ['-i', '/usr/bin/python3', '-k']),
+ '#! /usr/bin/python3 -R')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python',
+ ['-i', '/usr/bin/python3', '-k']),
+ '#! /usr/bin/python3')
+
+ def test_pathfix_adding_flag(self):
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python',
+ ['-i', '/usr/bin/python3', '-a', 's']),
+ '#! /usr/bin/python3 -s')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python -S',
+ ['-i', '/usr/bin/python3', '-a', 's']),
+ '#! /usr/bin/python3 -s')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python -V',
+ ['-i', '/usr/bin/python3', '-a', 'v', '-k']),
+ '#! /usr/bin/python3 -vV')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python',
+ ['-i', '/usr/bin/python3', '-a', 'Rs']),
+ '#! /usr/bin/python3 -Rs')
+ self.assertEqual(
+ self.pathfix(
+ '#! /usr/bin/env python -W default',
+ ['-i', '/usr/bin/python3', '-a', 's', '-k']),
+ '#! /usr/bin/python3 -sW default')
+
+ def test_pathfix_adding_errors(self):
+ self.pathfix(
+ '#! /usr/bin/env python -E',
+ ['-i', '/usr/bin/python3', '-a', 'W default', '-k'],
+ exitcode=2,
+ stderr="-a option doesn't support whitespaces")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/Tools/scripts/pathfix.py b/Tools/scripts/pathfix.py
index c5bf984..2dfa6e8 100755
--- a/Tools/scripts/pathfix.py
+++ b/Tools/scripts/pathfix.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-# Change the #! line occurring in Python scripts. The new interpreter
+# Change the #! line (shebang) occurring in Python scripts. The new interpreter
# pathname must be given with a -i option.
#
# Command line arguments are files or directories to be processed.
@@ -10,7 +10,13 @@
# arguments).
# The original file is kept as a back-up (with a "~" attached to its name),
# -n flag can be used to disable this.
-#
+
+# Sometimes you may find shebangs with flags such as `#! /usr/bin/env python -si`.
+# Normally, pathfix overwrites the entire line, including the flags.
+# To change interpreter and keep flags from the original shebang line, use -k.
+# If you want to keep flags and add to them one single literal flag, use option -a.
+
+
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions. Also note several subtleties like
@@ -33,16 +39,21 @@ rep = sys.stdout.write
new_interpreter = None
preserve_timestamps = False
create_backup = True
+keep_flags = False
+add_flags = b''
def main():
global new_interpreter
global preserve_timestamps
global create_backup
- usage = ('usage: %s -i /interpreter -p -n file-or-directory ...\n' %
+ global keep_flags
+ global add_flags
+
+ usage = ('usage: %s -i /interpreter -p -n -k -a file-or-directory ...\n' %
sys.argv[0])
try:
- opts, args = getopt.getopt(sys.argv[1:], 'i:pn')
+ opts, args = getopt.getopt(sys.argv[1:], 'i:a:kpn')
except getopt.error as msg:
err(str(msg) + '\n')
err(usage)
@@ -54,6 +65,13 @@ def main():
preserve_timestamps = True
if o == '-n':
create_backup = False
+ if o == '-k':
+ keep_flags = True
+ if o == '-a':
+ add_flags = a.encode()
+ if b' ' in add_flags:
+ err("-a option doesn't support whitespaces")
+ sys.exit(2)
if not new_interpreter or not new_interpreter.startswith(b'/') or \
not args:
err('-i option or file-or-directory missing\n')
@@ -70,10 +88,14 @@ def main():
if fix(arg): bad = 1
sys.exit(bad)
+
ispythonprog = re.compile(r'^[a-zA-Z0-9_]+\.py$')
+
+
def ispython(name):
return bool(ispythonprog.match(name))
+
def recursedown(dirname):
dbg('recursedown(%r)\n' % (dirname,))
bad = 0
@@ -96,6 +118,7 @@ def recursedown(dirname):
if recursedown(fullname): bad = 1
return bad
+
def fix(filename):
## dbg('fix(%r)\n' % (filename,))
try:
@@ -166,12 +189,43 @@ def fix(filename):
# Return success
return 0
+
+def parse_shebang(shebangline):
+ shebangline = shebangline.rstrip(b'\n')
+ start = shebangline.find(b' -')
+ if start == -1:
+ return b''
+ return shebangline[start:]
+
+
+def populate_flags(shebangline):
+ old_flags = b''
+ if keep_flags:
+ old_flags = parse_shebang(shebangline)
+ if old_flags:
+ old_flags = old_flags[2:]
+ if not (old_flags or add_flags):
+ return b''
+ # On Linux, the entire string following the interpreter name
+ # is passed as a single argument to the interpreter.
+ # e.g. "#! /usr/bin/python3 -W Error -s" runs "/usr/bin/python3 "-W Error -s"
+ # so shebang should have single '-' where flags are given and
+ # flag might need argument for that reasons adding new flags is
+ # between '-' and original flags
+ # e.g. #! /usr/bin/python3 -sW Error
+ return b' -' + add_flags + old_flags
+
+
def fixline(line):
if not line.startswith(b'#!'):
return line
+
if b"python" not in line:
return line
- return b'#! ' + new_interpreter + b'\n'
+
+ flags = populate_flags(line)
+ return b'#! ' + new_interpreter + flags + b'\n'
+
if __name__ == '__main__':
main()
--
2.29.2

@ -0,0 +1,184 @@
From e92381a0a6a3e1f000956e1f1e70e543b9c2bcd5 Mon Sep 17 00:00:00 2001
From: Benjamin Peterson <benjamin@python.org>
Date: Mon, 18 Jan 2021 14:47:05 -0600
Subject: [PATCH] [3.6] closes bpo-42938: Replace snprintf with Python unicode
formatting in ctypes param reprs. (24239). (cherry picked from commit
916610ef90a0d0761f08747f7b0905541f0977c7)
Co-authored-by: Benjamin Peterson <benjamin@python.org>
---
Lib/ctypes/test/test_parameters.py | 43 +++++++++++++++
.../2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst | 2 +
Modules/_ctypes/callproc.c | 55 +++++++------------
3 files changed, 66 insertions(+), 34 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
diff --git a/Lib/ctypes/test/test_parameters.py b/Lib/ctypes/test/test_parameters.py
index e4c25fd880cef..531894fdec838 100644
--- a/Lib/ctypes/test/test_parameters.py
+++ b/Lib/ctypes/test/test_parameters.py
@@ -201,6 +201,49 @@ def __dict__(self):
with self.assertRaises(ZeroDivisionError):
WorseStruct().__setstate__({}, b'foo')
+ def test_parameter_repr(self):
+ from ctypes import (
+ c_bool,
+ c_char,
+ c_wchar,
+ c_byte,
+ c_ubyte,
+ c_short,
+ c_ushort,
+ c_int,
+ c_uint,
+ c_long,
+ c_ulong,
+ c_longlong,
+ c_ulonglong,
+ c_float,
+ c_double,
+ c_longdouble,
+ c_char_p,
+ c_wchar_p,
+ c_void_p,
+ )
+ self.assertRegex(repr(c_bool.from_param(True)), r"^<cparam '\?' at 0x[A-Fa-f0-9]+>$")
+ self.assertEqual(repr(c_char.from_param(97)), "<cparam 'c' ('a')>")
+ self.assertRegex(repr(c_wchar.from_param('a')), r"^<cparam 'u' at 0x[A-Fa-f0-9]+>$")
+ self.assertEqual(repr(c_byte.from_param(98)), "<cparam 'b' (98)>")
+ self.assertEqual(repr(c_ubyte.from_param(98)), "<cparam 'B' (98)>")
+ self.assertEqual(repr(c_short.from_param(511)), "<cparam 'h' (511)>")
+ self.assertEqual(repr(c_ushort.from_param(511)), "<cparam 'H' (511)>")
+ self.assertRegex(repr(c_int.from_param(20000)), r"^<cparam '[li]' \(20000\)>$")
+ self.assertRegex(repr(c_uint.from_param(20000)), r"^<cparam '[LI]' \(20000\)>$")
+ self.assertRegex(repr(c_long.from_param(20000)), r"^<cparam '[li]' \(20000\)>$")
+ self.assertRegex(repr(c_ulong.from_param(20000)), r"^<cparam '[LI]' \(20000\)>$")
+ self.assertRegex(repr(c_longlong.from_param(20000)), r"^<cparam '[liq]' \(20000\)>$")
+ self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^<cparam '[LIQ]' \(20000\)>$")
+ self.assertEqual(repr(c_float.from_param(1.5)), "<cparam 'f' (1.5)>")
+ self.assertEqual(repr(c_double.from_param(1.5)), "<cparam 'd' (1.5)>")
+ self.assertEqual(repr(c_double.from_param(1e300)), "<cparam 'd' (1e+300)>")
+ self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^<cparam ('d' \(1.5\)|'g' at 0x[A-Fa-f0-9]+)>$")
+ self.assertRegex(repr(c_char_p.from_param(b'hihi')), "^<cparam 'z' \(0x[A-Fa-f0-9]+\)>$")
+ self.assertRegex(repr(c_wchar_p.from_param('hihi')), "^<cparam 'Z' \(0x[A-Fa-f0-9]+\)>$")
+ self.assertRegex(repr(c_void_p.from_param(0x12)), r"^<cparam 'P' \(0x0*12\)>$")
+
################################################################
if __name__ == '__main__':
diff --git a/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
new file mode 100644
index 0000000000000..7df65a156feab
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-01-18-09-27-31.bpo-42938.4Zn4Mp.rst
@@ -0,0 +1,2 @@
+Avoid static buffers when computing the repr of :class:`ctypes.c_double` and
+:class:`ctypes.c_longdouble` values.
diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
index d1c190f359108..2bb289bce043f 100644
--- a/Modules/_ctypes/callproc.c
+++ b/Modules/_ctypes/callproc.c
@@ -461,58 +461,47 @@ is_literal_char(unsigned char c)
static PyObject *
PyCArg_repr(PyCArgObject *self)
{
- char buffer[256];
switch(self->tag) {
case 'b':
case 'B':
- sprintf(buffer, "<cparam '%c' (%d)>",
+ return PyUnicode_FromFormat("<cparam '%c' (%d)>",
self->tag, self->value.b);
- break;
case 'h':
case 'H':
- sprintf(buffer, "<cparam '%c' (%d)>",
+ return PyUnicode_FromFormat("<cparam '%c' (%d)>",
self->tag, self->value.h);
- break;
case 'i':
case 'I':
- sprintf(buffer, "<cparam '%c' (%d)>",
+ return PyUnicode_FromFormat("<cparam '%c' (%d)>",
self->tag, self->value.i);
- break;
case 'l':
case 'L':
- sprintf(buffer, "<cparam '%c' (%ld)>",
+ return PyUnicode_FromFormat("<cparam '%c' (%ld)>",
self->tag, self->value.l);
- break;
case 'q':
case 'Q':
- sprintf(buffer,
-#ifdef MS_WIN32
- "<cparam '%c' (%I64d)>",
-#else
- "<cparam '%c' (%lld)>",
-#endif
+ return PyUnicode_FromFormat("<cparam '%c' (%lld)>",
self->tag, self->value.q);
- break;
case 'd':
- sprintf(buffer, "<cparam '%c' (%f)>",
- self->tag, self->value.d);
- break;
- case 'f':
- sprintf(buffer, "<cparam '%c' (%f)>",
- self->tag, self->value.f);
- break;
-
+ case 'f': {
+ PyObject *f = PyFloat_FromDouble((self->tag == 'f') ? self->value.f : self->value.d);
+ if (f == NULL) {
+ return NULL;
+ }
+ PyObject *result = PyUnicode_FromFormat("<cparam '%c' (%R)>", self->tag, f);
+ Py_DECREF(f);
+ return result;
+ }
case 'c':
if (is_literal_char((unsigned char)self->value.c)) {
- sprintf(buffer, "<cparam '%c' ('%c')>",
+ return PyUnicode_FromFormat("<cparam '%c' ('%c')>",
self->tag, self->value.c);
}
else {
- sprintf(buffer, "<cparam '%c' ('\\x%02x')>",
+ return PyUnicode_FromFormat("<cparam '%c' ('\\x%02x')>",
self->tag, (unsigned char)self->value.c);
}
- break;
/* Hm, are these 'z' and 'Z' codes useful at all?
Shouldn't they be replaced by the functionality of c_string
@@ -521,22 +510,20 @@ PyCArg_repr(PyCArgObject *self)
case 'z':
case 'Z':
case 'P':
- sprintf(buffer, "<cparam '%c' (%p)>",
+ return PyUnicode_FromFormat("<cparam '%c' (%p)>",
self->tag, self->value.p);
break;
default:
if (is_literal_char((unsigned char)self->tag)) {
- sprintf(buffer, "<cparam '%c' at %p>",
- (unsigned char)self->tag, self);
+ return PyUnicode_FromFormat("<cparam '%c' at %p>",
+ (unsigned char)self->tag, (void *)self);
}
else {
- sprintf(buffer, "<cparam 0x%02x at %p>",
- (unsigned char)self->tag, self);
+ return PyUnicode_FromFormat("<cparam 0x%02x at %p>",
+ (unsigned char)self->tag, (void *)self);
}
- break;
}
- return PyUnicode_FromString(buffer);
}
static PyMemberDef PyCArgType_members[] = {

@ -0,0 +1,684 @@
commit 9e77ec82c40ab59846f9447b7c483e7b8e368b16
Author: Petr Viktorin <pviktori@redhat.com>
Date: Thu Mar 4 13:59:56 2021 +0100
CVE-2021-23336: Add `separator` argument to parse_qs; warn with default
Partially backports https://bugs.python.org/issue42967 : [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl().
However, this solution is different than the upstream solution in Python 3.6.13.
An optional argument seperator is added to specify the separator.
It is recommended to set it to '&' or ';' to match the application or proxy in use.
The default can be set with an env variable of a config file.
If neither the argument, env var or config file specifies a separator, "&" is used
but a warning is raised if parse_qs is used on input that contains ';'.
Co-authors of the upstream change (who do not necessarily agree with this):
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Co-authored-by: Éric Araujo <merwok@netwok.org>
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 41219eeaaba..ddecc0af23a 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -277,13 +277,12 @@ These are useful if you want more control, or if you want to employ some of the
algorithms implemented in this module in other circumstances.
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator=None)
Parse a query in the environment or from a file (the file defaults to
- ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are
+ ``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
passed to :func:`urllib.parse.parse_qs` unchanged.
-
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False)
This function is deprecated in this module. Use :func:`urllib.parse.parse_qs`
@@ -308,7 +307,6 @@ algorithms implemented in this module in other circumstances.
Note that this does not parse nested multipart parts --- use
:class:`FieldStorage` for that.
-
.. function:: parse_header(string)
Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 647af613a31..bcab7c142bc 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -143,7 +143,7 @@ or on combining URL components into a URL string.
now raise :exc:`ValueError`.
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator=None)
Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
@@ -168,6 +168,15 @@ or on combining URL components into a URL string.
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.
+ The optional argument *separator* is the symbol to use for separating the
+ query arguments. It is recommended to set it to ``'&'`` or ``';'``.
+ It defaults to ``'&'``; a warning is raised if this default is used.
+ This default may be changed with the following environment variable settings:
+
+ - ``PYTHON_URLLIB_QS_SEPARATOR='&'``: use only ``&`` as separator, without warning (as in Python 3.6.13+ or 3.10)
+ - ``PYTHON_URLLIB_QS_SEPARATOR=';'``: use only ``;`` as separator
+ - ``PYTHON_URLLIB_QS_SEPARATOR=legacy``: use both ``&`` and ``;`` (as in previous versions of Python)
+
Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
parameter set to ``True``) to convert such dictionaries into query
strings.
@@ -204,6 +213,9 @@ or on combining URL components into a URL string.
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.
+ The optional argument *separator* is the symbol to use for separating the
+ query arguments. It works as in :py:func:`parse_qs`.
+
Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
query strings.
@@ -213,7 +225,6 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.6.8
Added *max_num_fields* parameter.
-
.. function:: urlunparse(parts)
Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 56f243e09f0..5ab2a5d6af6 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -117,7 +117,8 @@ log = initlog # The current logging function
# 0 ==> unlimited input
maxlen = 0
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
+ strict_parsing=0, separator=None):
"""Parse a query in the environment or from a file (default stdin)
Arguments, all optional:
@@ -136,6 +137,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
+
+ separator: str. The symbol to use for separating the query arguments.
"""
if fp is None:
fp = sys.stdin
@@ -156,7 +159,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
if environ['REQUEST_METHOD'] == 'POST':
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
if ctype == 'multipart/form-data':
- return parse_multipart(fp, pdict)
+ return parse_multipart(fp, pdict, separator=separator)
elif ctype == 'application/x-www-form-urlencoded':
clength = int(environ['CONTENT_LENGTH'])
if maxlen and clength > maxlen:
@@ -182,21 +185,21 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
encoding=encoding)
-
# parse query string function called from urlparse,
# this is done in order to maintain backward compatibility.
-
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, separator=None):
"""Parse a query given as a string argument."""
warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
DeprecationWarning, 2)
- return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
+ return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
+ separator=separator)
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, separator=None):
"""Parse a query given as a string argument."""
warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
DeprecationWarning, 2)
- return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
+ return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing,
+ separator=separator)
def parse_multipart(fp, pdict):
"""Parse multipart input.
@@ -297,7 +300,6 @@ def parse_multipart(fp, pdict):
return partdict
-
def _parseparam(s):
while s[:1] == ';':
s = s[1:]
@@ -405,7 +407,7 @@ class FieldStorage:
def __init__(self, fp=None, headers=None, outerboundary=b'',
environ=os.environ, keep_blank_values=0, strict_parsing=0,
limit=None, encoding='utf-8', errors='replace',
- max_num_fields=None):
+ max_num_fields=None, separator=None):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
@@ -453,6 +455,7 @@ class FieldStorage:
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
self.max_num_fields = max_num_fields
+ self.separator = separator
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
@@ -678,7 +681,7 @@ class FieldStorage:
query = urllib.parse.parse_qsl(
qs, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors,
- max_num_fields=self.max_num_fields)
+ max_num_fields=self.max_num_fields, separator=self.separator)
self.list = [MiniFieldStorage(key, value) for key, value in query]
self.skip_lines()
@@ -694,7 +697,7 @@ class FieldStorage:
query = urllib.parse.parse_qsl(
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors,
- max_num_fields=self.max_num_fields)
+ max_num_fields=self.max_num_fields, separator=self.separator)
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
klass = self.FieldStorageClass or self.__class__
@@ -736,7 +739,8 @@ class FieldStorage:
part = klass(self.fp, headers, ib, environ, keep_blank_values,
strict_parsing,self.limit-self.bytes_read,
- self.encoding, self.errors, max_num_fields)
+ self.encoding, self.errors, max_num_fields,
+ separator=self.separator)
if max_num_fields is not None:
max_num_fields -= 1
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index b3e2d4cce8e..5ae3e085e1e 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -55,12 +55,9 @@ parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
- (";", ValueError("bad query field: ''")),
- (";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
- ("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
@@ -75,8 +72,6 @@ parse_strict_test_cases = [
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
@@ -164,6 +159,35 @@ class CgiTests(unittest.TestCase):
env = {'QUERY_STRING': orig}
fs = cgi.FieldStorage(environ=env)
+ if isinstance(expect, dict):
+ # test dict interface
+ self.assertEqual(len(expect), len(fs))
+ self.assertCountEqual(expect.keys(), fs.keys())
+ self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
+ # test individual fields
+ for key in expect.keys():
+ expect_val = expect[key]
+ self.assertIn(key, fs)
+ if len(expect_val) > 1:
+ self.assertEqual(fs.getvalue(key), expect_val)
+ else:
+ self.assertEqual(fs.getvalue(key), expect_val[0])
+
+ def test_separator(self):
+ parse_semicolon = [
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+ (";", ValueError("bad query field: ''")),
+ (";;", ValueError("bad query field: ''")),
+ ("=;a", ValueError("bad query field: 'a'")),
+ (";b=a", ValueError("bad query field: ''")),
+ ("b;=a", ValueError("bad query field: 'b'")),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+ ]
+ for orig, expect in parse_semicolon:
+ env = {'QUERY_STRING': orig}
+ fs = cgi.FieldStorage(separator=';', environ=env)
if isinstance(expect, dict):
# test dict interface
self.assertEqual(len(expect), len(fs))
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 68f633ca3a7..1ec86ba0fc2 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -2,6 +2,11 @@ import sys
import unicodedata
import unittest
import urllib.parse
+from test.support import EnvironmentVarGuard
+from warnings import catch_warnings
+import tempfile
+import contextlib
+import os.path
RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
@@ -32,6 +37,9 @@ parse_qsl_test_cases = [
(b"&a=b", [(b'a', b'b')]),
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
+]
+
+parse_qsl_test_cases_semicolon = [
(";", []),
(";;", []),
(";a=b", [('a', 'b')]),
@@ -44,6 +52,21 @@ parse_qsl_test_cases = [
(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
]
+parse_qsl_test_cases_legacy = [
+ (b"a=1;a=2&a=3", [(b'a', b'1'), (b'a', b'2'), (b'a', b'3')]),
+ (b"a=1;b=2&c=3", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
+ (b"a=1&b=2&c=3;", [(b'a', b'1'), (b'b', b'2'), (b'c', b'3')]),
+]
+
+parse_qsl_test_cases_warn = [
+ (";a=b", [(';a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+ (b";a=b", [(b';a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
+ ("a=1;a=2&a=3", [('a', '1;a=2'), ('a', '3')]),
+ (b"a=1;a=2&a=3", [(b'a', b'1;a=2'), (b'a', b'3')]),
+]
+
# Each parse_qs testcase is a two-tuple that contains
# a string with the query and a dictionary with the expected result.
@@ -68,6 +91,9 @@ parse_qs_test_cases = [
(b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}),
+]
+
+parse_qs_test_cases_semicolon = [
(";", {}),
(";;", {}),
(";a=b", {'a': ['b']}),
@@ -80,6 +106,24 @@ parse_qs_test_cases = [
(b"a=1;a=2", {b'a': [b'1', b'2']}),
]
+parse_qs_test_cases_legacy = [
+ ("a=1;a=2&a=3", {'a': ['1', '2', '3']}),
+ ("a=1;b=2&c=3", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
+ ("a=1&b=2&c=3;", {'a': ['1'], 'b': ['2'], 'c': ['3']}),
+ (b"a=1;a=2&a=3", {b'a': [b'1', b'2', b'3']}),
+ (b"a=1;b=2&c=3", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
+ (b"a=1&b=2&c=3;", {b'a': [b'1'], b'b': [b'2'], b'c': [b'3']}),
+]
+
+parse_qs_test_cases_warn = [
+ (";a=b", {';a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+ (b";a=b", {b';a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
+ ("a=1;a=2&a=3", {'a': ['1;a=2', '3']}),
+ (b"a=1;a=2&a=3", {b'a': [b'1;a=2', b'3']}),
+]
+
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
@@ -152,6 +196,40 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result, expect_without_blanks,
"Error parsing %r" % orig)
+ def test_qs_default_warn(self):
+ for orig, expect in parse_qs_test_cases_warn:
+ with self.subTest(orig=orig, expect=expect):
+ with catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qs(orig, keep_blank_values=True)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 1)
+ self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
+
+ def test_qsl_default_warn(self):
+ for orig, expect in parse_qsl_test_cases_warn:
+ with self.subTest(orig=orig, expect=expect):
+ with catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 1)
+ self.assertEqual(w[0].category, urllib.parse._QueryStringSeparatorWarning)
+
+ def test_default_qs_no_warnings(self):
+ for orig, expect in parse_qs_test_cases:
+ with self.subTest(orig=orig, expect=expect):
+ with catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qs(orig, keep_blank_values=True)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
+ def test_default_qsl_no_warnings(self):
+ for orig, expect in parse_qsl_test_cases:
+ with self.subTest(orig=orig, expect=expect):
+ with catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
def test_roundtrips(self):
str_cases = [
('file:///tmp/junk.txt',
@@ -885,8 +963,151 @@ class UrlParseTestCase(unittest.TestCase):
with self.assertRaises(ValueError):
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
with self.assertRaises(ValueError):
- urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
+ urllib.parse.parse_qs(';'.join(['a=a']*11), separator=';', max_num_fields=10)
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qs('SEP'.join(['a=a']*11), separator='SEP', max_num_fields=10)
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
+ urllib.parse.parse_qs(';'.join(['a=a']*10), separator=';', max_num_fields=10)
+ urllib.parse.parse_qs('SEP'.join(['a=a']*10), separator='SEP', max_num_fields=10)
+
+ def test_parse_qs_separator_bytes(self):
+ expected = {b'a': [b'1'], b'b': [b'2']}
+
+ result = urllib.parse.parse_qs(b'a=1;b=2', separator=b';')
+ self.assertEqual(result, expected)
+ result = urllib.parse.parse_qs(b'a=1;b=2', separator=';')
+ self.assertEqual(result, expected)
+ result = urllib.parse.parse_qs('a=1;b=2', separator=';')
+ self.assertEqual(result, {'a': ['1'], 'b': ['2']})
+
+ @contextlib.contextmanager
+ def _qsl_sep_config(self, sep):
+ """Context for the given parse_qsl default separator configured in config file"""
+ old_filename = urllib.parse._QS_SEPARATOR_CONFIG_FILENAME
+ urllib.parse._default_qs_separator = None
+ try:
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ filename = os.path.join(tmpdirname, 'conf.cfg')
+ with open(filename, 'w') as file:
+ file.write(f'[parse_qs]\n')
+ file.write(f'PYTHON_URLLIB_QS_SEPARATOR = {sep}')
+ urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = filename
+ yield
+ finally:
+ urllib.parse._QS_SEPARATOR_CONFIG_FILENAME = old_filename
+ urllib.parse._default_qs_separator = None
+
+ def test_parse_qs_separator_semicolon(self):
+ for orig, expect in parse_qs_test_cases_semicolon:
+ with self.subTest(orig=orig, expect=expect, method='arg'):
+ result = urllib.parse.parse_qs(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ with self.subTest(orig=orig, expect=expect, method='env'):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
+ result = urllib.parse.parse_qs(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+ with self.subTest(orig=orig, expect=expect, method='conf'):
+ with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qs(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
+ def test_parse_qsl_separator_semicolon(self):
+ for orig, expect in parse_qsl_test_cases_semicolon:
+ with self.subTest(orig=orig, expect=expect, method='arg'):
+ result = urllib.parse.parse_qsl(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ with self.subTest(orig=orig, expect=expect, method='env'):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = ';'
+ result = urllib.parse.parse_qsl(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+ with self.subTest(orig=orig, expect=expect, method='conf'):
+ with self._qsl_sep_config(';'), catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qsl(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
+ def test_parse_qs_separator_legacy(self):
+ for orig, expect in parse_qs_test_cases_legacy:
+ with self.subTest(orig=orig, expect=expect, method='env'):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
+ result = urllib.parse.parse_qs(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+ with self.subTest(orig=orig, expect=expect, method='conf'):
+ with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qs(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
+ def test_parse_qsl_separator_legacy(self):
+ for orig, expect in parse_qsl_test_cases_legacy:
+ with self.subTest(orig=orig, expect=expect, method='env'):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = 'legacy'
+ result = urllib.parse.parse_qsl(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+ with self.subTest(orig=orig, expect=expect, method='conf'):
+ with self._qsl_sep_config('legacy'), catch_warnings(record=True) as w:
+ result = urllib.parse.parse_qsl(orig)
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+ self.assertEqual(len(w), 0)
+
+ def test_parse_qs_separator_bad_value_env_or_config(self):
+ for bad_sep in '', 'abc', 'safe', '&;', 'SEP':
+ with self.subTest(bad_sep, method='env'):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = bad_sep
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qsl('a=1;b=2')
+ with self.subTest(bad_sep, method='conf'):
+ with self._qsl_sep_config('bad_sep'), catch_warnings(record=True) as w:
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qsl('a=1;b=2')
+
+ def test_parse_qs_separator_bad_value_arg(self):
+ for bad_sep in True, {}, '':
+ with self.subTest(bad_sep):
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qsl('a=1;b=2', separator=bad_sep)
+
+ def test_parse_qs_separator_num_fields(self):
+ for qs, sep in (
+ ('a&b&c', '&'),
+ ('a;b;c', ';'),
+ ('a&b;c', 'legacy'),
+ ):
+ with self.subTest(qs=qs, sep=sep):
+ with EnvironmentVarGuard() as environ, catch_warnings(record=True) as w:
+ if sep != 'legacy':
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qsl(qs, separator=sep, max_num_fields=2)
+ if sep:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = sep
+ with self.assertRaises(ValueError):
+ urllib.parse.parse_qsl(qs, max_num_fields=2)
+
+ def test_parse_qs_separator_priority(self):
+ # env variable trumps config file
+ with self._qsl_sep_config('~'), EnvironmentVarGuard() as environ:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = '!'
+ result = urllib.parse.parse_qs('a=1!b=2~c=3')
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
+ # argument trumps config file
+ with self._qsl_sep_config('~'):
+ result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
+ # argument trumps env variable
+ with EnvironmentVarGuard() as environ:
+ environ['PYTHON_URLLIB_QS_SEPARATOR'] = '~'
+ result = urllib.parse.parse_qs('a=1$b=2~c=3', separator='$')
+ self.assertEqual(result, {'a': ['1'], 'b': ['2~c=3']})
def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index fa8827a9fa7..57b8fcf8bbd 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -28,6 +28,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
"""
import re
+import os
import sys
import collections
@@ -644,7 +645,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None,
+ separator=None):
"""Parse a query given as a string argument.
Arguments:
@@ -673,7 +675,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors,
- max_num_fields=max_num_fields)
+ max_num_fields=max_num_fields,
+ separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -681,9 +684,16 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
parsed_result[name] = [value]
return parsed_result
+class _QueryStringSeparatorWarning(RuntimeWarning):
+ """Warning for using default `separator` in parse_qs or parse_qsl"""
+
+# The default "separator" for parse_qsl can be specified in a config file.
+# It's cached after first read.
+_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
+_default_qs_separator = None
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator=None):
"""Parse a query given as a string argument.
Arguments:
@@ -710,15 +720,77 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
"""
qs, _coerce_result = _coerce_args(qs)
+ if isinstance(separator, bytes):
+ separator = separator.decode('ascii')
+
+ if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
+ raise ValueError("Separator must be of type string or bytes.")
+
+ # Used when both "&" and ";" act as separators. (Need a non-string value.)
+ _legacy = object()
+
+ if separator is None:
+ global _default_qs_separator
+ separator = _default_qs_separator
+ envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
+ if separator is None:
+ # Set default separator from environment variable
+ separator = os.environ.get(envvar_name)
+ config_source = 'environment variable'
+ if separator is None:
+ # Set default separator from the configuration file
+ try:
+ file = open(_QS_SEPARATOR_CONFIG_FILENAME)
+ except FileNotFoundError:
+ pass
+ else:
+ with file:
+ import configparser
+ config = configparser.ConfigParser(
+ interpolation=None,
+ comment_prefixes=('#', ),
+ )
+ config.read_file(file)
+ separator = config.get('parse_qs', envvar_name, fallback=None)
+ _default_qs_separator = separator
+ config_source = _QS_SEPARATOR_CONFIG_FILENAME
+ if separator is None:
+ # The default is '&', but warn if not specified explicitly
+ if ';' in qs:
+ from warnings import warn
+ warn("The default separator of urllib.parse.parse_qsl and "
+ + "parse_qs was changed to '&' to avoid a web cache "
+ + "poisoning issue (CVE-2021-23336). "
+ + "By default, semicolons no longer act as query field "
+ + "separators. "
+ + "See https://access.redhat.com/articles/5860431 for "
+ + "more details.",
+ _QueryStringSeparatorWarning, stacklevel=2)
+ separator = '&'
+ elif separator == 'legacy':
+ separator = _legacy
+ elif len(separator) != 1:
+ raise ValueError(
+ f'{envvar_name} (from {config_source}) must contain '
+ + '1 character, or "legacy". See '
+ + 'https://access.redhat.com/articles/5860431 for more details.'
+ )
+
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count('&') + qs.count(';')
+ if separator is _legacy:
+ num_fields = 1 + qs.count('&') + qs.count(';')
+ else:
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ if separator is _legacy:
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ else:
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
new file mode 100644
index 00000000000..bc82c963067
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
@@ -0,0 +1 @@
+Make it possible to fix web cache poisoning vulnerability by allowing the user to choose a custom separator query args.

@ -0,0 +1,101 @@
From 5b1e50256b6532667b6d31debc350f6c7d3f30aa Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 29 Mar 2021 08:40:53 -0700
Subject: [PATCH] bpo-42988: Remove the pydoc getfile feature (GH-25015)
(GH-25067)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
CVE-2021-3426: Remove the "getfile" feature of the pydoc module which
could be abused to read arbitrary files on the disk (directory
traversal vulnerability). Moreover, even source code of Python
modules can contain sensitive data like passwords. Vulnerability
reported by David Schwörer.
(cherry picked from commit 9b999479c0022edfc9835a8a1f06e046f3881048)
Co-authored-by: Victor Stinner <vstinner@python.org>
---
Lib/pydoc.py | 18 ------------------
Lib/test/test_pydoc.py | 6 ------
.../2021-03-24-14-16-56.bpo-42988.P2aNco.rst | 4 ++++
3 files changed, 4 insertions(+), 24 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2021-03-24-14-16-56.bpo-42988.P2aNco.rst
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index b521a5504728c4..5247ef9ea27aa1 100644
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -2312,9 +2312,6 @@ def page(self, title, contents):
%s</head><body bgcolor="#f0f0f8">%s<div style="clear:both;padding-top:.5em;">%s</div>
</body></html>''' % (title, css_link, html_navbar(), contents)
- def filelink(self, url, path):
- return '<a href="getfile?key=%s">%s</a>' % (url, path)
-
html = _HTMLDoc()
@@ -2400,19 +2397,6 @@ def bltinlink(name):
'key = %s' % key, '#ffffff', '#ee77aa', '<br>'.join(results))
return 'Search Results', contents
- def html_getfile(path):
- """Get and display a source file listing safely."""
- path = urllib.parse.unquote(path)
- with tokenize.open(path) as fp:
- lines = html.escape(fp.read())
- body = '<pre>%s</pre>' % lines
- heading = html.heading(
- '<big><big><strong>File Listing</strong></big></big>',
- '#ffffff', '#7799ee')
- contents = heading + html.bigsection(
- 'File: %s' % path, '#ffffff', '#ee77aa', body)
- return 'getfile %s' % path, contents
-
def html_topics():
"""Index of topic texts available."""
@@ -2504,8 +2488,6 @@ def get_html_page(url):
op, _, url = url.partition('=')
if op == "search?key":
title, content = html_search(url)
- elif op == "getfile?key":
- title, content = html_getfile(url)
elif op == "topic?key":
# try topics first, then objects.
try:
diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py
index 00803d3305cb53..49bc3eb164b19c 100644
--- a/Lib/test/test_pydoc.py
+++ b/Lib/test/test_pydoc.py
@@ -1052,18 +1052,12 @@ def test_url_requests(self):
("topic?key=def", "Pydoc: KEYWORD def"),
("topic?key=STRINGS", "Pydoc: TOPIC STRINGS"),
("foobar", "Pydoc: Error - foobar"),
- ("getfile?key=foobar", "Pydoc: Error - getfile?key=foobar"),
]
with self.restrict_walk_packages():
for url, title in requests:
self.call_url_handler(url, title)
- path = string.__file__
- title = "Pydoc: getfile " + path
- url = "getfile?key=" + path
- self.call_url_handler(url, title)
-
class TestHelper(unittest.TestCase):
def test_keywords(self):
diff --git a/Misc/NEWS.d/next/Security/2021-03-24-14-16-56.bpo-42988.P2aNco.rst b/Misc/NEWS.d/next/Security/2021-03-24-14-16-56.bpo-42988.P2aNco.rst
new file mode 100644
index 00000000000000..4b42dd05305a83
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-03-24-14-16-56.bpo-42988.P2aNco.rst
@@ -0,0 +1,4 @@
+CVE-2021-3426: Remove the ``getfile`` feature of the :mod:`pydoc` module which
+could be abused to read arbitrary files on the disk (directory traversal
+vulnerability). Moreover, even source code of Python modules can contain
+sensitive data like passwords. Vulnerability reported by David Schwörer.

@ -0,0 +1,36 @@
bpo-44422: Fix threading.enumerate() reentrant call (GH-26727)
The threading.enumerate() function now uses a reentrant lock to
prevent a hang on reentrant call.
https://github.com/python/cpython/commit/243fd01047ddce1a7eb0f99a49732d123e942c63
Resolves: rhbz#1959459
diff --git a/Lib/threading.py b/Lib/threading.py
index 0ab1e46..7ab9ad8 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -727,8 +727,11 @@ _counter() # Consume 0 so first non-main thread has id 1.
def _newname(template="Thread-%d"):
return template % _counter()
-# Active thread administration
-_active_limbo_lock = _allocate_lock()
+# Active thread administration.
+#
+# bpo-44422: Use a reentrant lock to allow reentrant calls to functions like
+# threading.enumerate().
+_active_limbo_lock = RLock()
_active = {} # maps thread id to Thread object
_limbo = {}
_dangling = WeakSet()
@@ -1325,7 +1328,7 @@ def _after_fork():
# Reset _active_limbo_lock, in case we forked while the lock was held
# by another (non-forked) thread. http://bugs.python.org/issue874900
global _active_limbo_lock, _main_thread
- _active_limbo_lock = _allocate_lock()
+ _active_limbo_lock = RLock()
# fork() only copied the current thread; clear references to others.
new_active = {}

@ -0,0 +1,43 @@
bpo-44434: Don't call PyThread_exit_thread() explicitly (GH-26758)
_thread.start_new_thread() no longer calls PyThread_exit_thread()
explicitly at the thread exit, the call was redundant.
On Linux with the glibc, pthread_cancel() loads dynamically the
libgcc_s.so.1 library. dlopen() can fail if there is no more
available file descriptor to open the file. In this case, the process
aborts with the error message:
"libgcc_s.so.1 must be installed for pthread_cancel to work"
pthread_cancel() unwinds back to the thread's wrapping function that
calls the thread entry point.
The unwind function is dynamically loaded from the libgcc_s library
since it is tightly coupled to the C compiler (GCC). The unwinder
depends on DWARF, the compiler generates DWARF, so the unwinder
belongs to the compiler.
Thanks Florian Weimer and Carlos O'Donell for their help on
investigating this issue.
https://github.com/python/cpython/commit/45a78f906d2d5fe5381d78466b11763fc56d57ba
Resolves: rhbz#1972293
diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c
index a13b2e0..8cc035b 100644
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@@ -1027,7 +1027,10 @@ t_bootstrap(void *boot_raw)
nb_threads--;
PyThreadState_Clear(tstate);
PyThreadState_DeleteCurrent();
- PyThread_exit_thread();
+
+ // bpo-44434: Don't call explicitly PyThread_exit_thread(). On Linux with
+ // the glibc, pthread_exit() can abort the whole process if dlopen() fails
+ // to open the libgcc_s.so library (ex: EMFILE error).
}
static PyObject *

@ -0,0 +1,40 @@
From 29c669440dddba61d18e1b7fdd57180cae9e4ae3 Mon Sep 17 00:00:00 2001
From: Yeting Li <liyt@ios.ac.cn>
Date: Wed, 7 Apr 2021 19:27:41 +0800
Subject: [PATCH] bpo-43075: Fix ReDoS in urllib AbstractBasicAuthHandler
(GH-24391)
Fix Regular Expression Denial of Service (ReDoS) vulnerability in
urllib.request.AbstractBasicAuthHandler. The ReDoS-vulnerable regex
has quadratic worst-case complexity and it allows cause a denial of
service when identifying crafted invalid RFCs. This ReDoS issue is on
the client side and needs remote attackers to control the HTTP server.
(cherry picked from commit 7215d1ae25525c92b026166f9d5cac85fb1defe1)
Co-authored-by: Yeting Li <liyt@ios.ac.cn>
---
Lib/urllib/request.py | 2 +-
.../next/Security/2021-01-31-05-28-14.bpo-43075.DoAXqO.rst | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Security/2021-01-31-05-28-14.bpo-43075.DoAXqO.rst
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 6624e04317ba2..56565405a7097 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -947,7 +947,7 @@ class AbstractBasicAuthHandler:
# (single quotes are a violation of the RFC, but appear in the wild)
rx = re.compile('(?:^|,)' # start of the string or ','
'[ \t]*' # optional whitespaces
- '([^ \t]+)' # scheme like "Basic"
+ '([^ \t,]+)' # scheme like "Basic"
'[ \t]+' # mandatory whitespaces
# realm=xxx
# realm='xxx'
diff --git a/Misc/NEWS.d/next/Security/2021-01-31-05-28-14.bpo-43075.DoAXqO.rst b/Misc/NEWS.d/next/Security/2021-01-31-05-28-14.bpo-43075.DoAXqO.rst
new file mode 100644
index 0000000000000..1c9f727e965fb
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-01-31-05-28-14.bpo-43075.DoAXqO.rst
@@ -0,0 +1 @@
+Fix Regular Expression Denial of Service (ReDoS) vulnerability in :class:`urllib.request.AbstractBasicAuthHandler`. The ReDoS-vulnerable regex has quadratic worst-case complexity and it allows cause a denial of service when identifying crafted invalid RFCs. This ReDoS issue is on the client side and needs remote attackers to control the HTTP server.

@ -0,0 +1,119 @@
From f7fb35b563a9182c22fbdd03c72ec3724dafe918 Mon Sep 17 00:00:00 2001
From: Gen Xu <xgbarry@gmail.com>
Date: Wed, 5 May 2021 15:42:41 -0700
Subject: [PATCH] bpo-44022: Fix http client infinite line reading (DoS) after
a HTTP 100 Continue (GH-25916)
Fixes http.client potential denial of service where it could get stuck reading lines from a malicious server after a 100 Continue response.
Co-authored-by: Gregory P. Smith <greg@krypto.org>
(cherry picked from commit 47895e31b6f626bc6ce47d175fe9d43c1098909d)
Co-authored-by: Gen Xu <xgbarry@gmail.com>
---
Lib/http/client.py | 38 ++++++++++---------
Lib/test/test_httplib.py | 10 ++++-
.../2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst | 2 +
3 files changed, 32 insertions(+), 18 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 53581eca20587..07e675fac5981 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -205,15 +205,11 @@ def getallmatchingheaders(self, name):
lst.append(line)
return lst
-def parse_headers(fp, _class=HTTPMessage):
- """Parses only RFC2822 headers from a file pointer.
-
- email Parser wants to see strings rather than bytes.
- But a TextIOWrapper around self.rfile would buffer too many bytes
- from the stream, bytes which we later need to read as bytes.
- So we read the correct bytes here, as bytes, for email Parser
- to parse.
+def _read_headers(fp):
+ """Reads potential header lines into a list from a file pointer.
+ Length of line is limited by _MAXLINE, and number of
+ headers is limited by _MAXHEADERS.
"""
headers = []
while True:
@@ -225,6 +221,19 @@ def parse_headers(fp, _class=HTTPMessage):
raise HTTPException("got more than %d headers" % _MAXHEADERS)
if line in (b'\r\n', b'\n', b''):
break
+ return headers
+
+def parse_headers(fp, _class=HTTPMessage):
+ """Parses only RFC2822 headers from a file pointer.
+
+ email Parser wants to see strings rather than bytes.
+ But a TextIOWrapper around self.rfile would buffer too many bytes
+ from the stream, bytes which we later need to read as bytes.
+ So we read the correct bytes here, as bytes, for email Parser
+ to parse.
+
+ """
+ headers = _read_headers(fp)
hstring = b''.join(headers).decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring)
@@ -312,15 +321,10 @@ def begin(self):
if status != CONTINUE:
break
# skip the header from the 100 response
- while True:
- skip = self.fp.readline(_MAXLINE + 1)
- if len(skip) > _MAXLINE:
- raise LineTooLong("header line")
- skip = skip.strip()
- if not skip:
- break
- if self.debuglevel > 0:
- print("header:", skip)
+ skipped_headers = _read_headers(self.fp)
+ if self.debuglevel > 0:
+ print("headers:", skipped_headers)
+ del skipped_headers
self.code = self.status = status
self.reason = reason.strip()
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 03e049b13fd21..0db287507c7bf 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -971,6 +971,14 @@ def test_overflowing_header_line(self):
resp = client.HTTPResponse(FakeSocket(body))
self.assertRaises(client.LineTooLong, resp.begin)
+ def test_overflowing_header_limit_after_100(self):
+ body = (
+ 'HTTP/1.1 100 OK\r\n'
+ 'r\n' * 32768
+ )
+ resp = client.HTTPResponse(FakeSocket(body))
+ self.assertRaises(client.HTTPException, resp.begin)
+
def test_overflowing_chunked_line(self):
body = (
'HTTP/1.1 200 OK\r\n'
@@ -1377,7 +1385,7 @@ def readline(self, limit):
class OfflineTest(TestCase):
def test_all(self):
# Documented objects defined in the module should be in __all__
- expected = {"responses"} # White-list documented dict() object
+ expected = {"responses"} # Allowlist documented dict() object
# HTTPMessage, parse_headers(), and the HTTP status code constants are
# intentionally omitted for simplicity
blacklist = {"HTTPMessage", "parse_headers"}
diff --git a/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
new file mode 100644
index 0000000000000..cf6b63e396155
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
@@ -0,0 +1,2 @@
+mod:`http.client` now avoids infinitely reading potential HTTP headers after a
+``100 Continue`` status response from the server.

@ -0,0 +1,74 @@
diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py
index 11ebcf1..ee3d960 100644
--- a/Lib/logging/handlers.py
+++ b/Lib/logging/handlers.py
@@ -181,14 +181,17 @@ class RotatingFileHandler(BaseRotatingHandler):
Basically, see if the supplied record would cause the file to exceed
the size limit we have.
"""
+ # See bpo-45401: Never rollover anything other than regular files
+ if os.path.exists(self.baseFilename) and not os.path.isfile(self.baseFilename):
+ return False
if self.stream is None: # delay was set...
self.stream = self._open()
if self.maxBytes > 0: # are we rolling over?
msg = "%s\n" % self.format(record)
self.stream.seek(0, 2) #due to non-posix-compliant Windows feature
if self.stream.tell() + len(msg) >= self.maxBytes:
- return 1
- return 0
+ return True
+ return False
class TimedRotatingFileHandler(BaseRotatingHandler):
"""
@@ -335,10 +338,13 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
record is not used, as we are just comparing times, but it is needed so
the method signatures are the same
"""
+ # See bpo-45401: Never rollover anything other than regular files
+ if os.path.exists(self.baseFilename) and not os.path.isfile(self.baseFilename):
+ return False
t = int(time.time())
if t >= self.rolloverAt:
- return 1
- return 0
+ return True
+ return False
def getFilesToDelete(self):
"""
diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py
index 45b72e3..055b8e3 100644
--- a/Lib/test/test_logging.py
+++ b/Lib/test/test_logging.py
@@ -4219,6 +4219,13 @@ class RotatingFileHandlerTest(BaseFileTest):
rh = logging.handlers.RotatingFileHandler(self.fn, maxBytes=0)
self.assertFalse(rh.shouldRollover(None))
rh.close()
+ # bpo-45401 - test with special file
+ # We set maxBytes to 1 so that rollover would normally happen, except
+ # for the check for regular files
+ rh = logging.handlers.RotatingFileHandler(
+ os.devnull, encoding="utf-8", maxBytes=1)
+ self.assertFalse(rh.shouldRollover(self.next_rec()))
+ rh.close()
def test_should_rollover(self):
rh = logging.handlers.RotatingFileHandler(self.fn, maxBytes=1)
@@ -4294,6 +4301,15 @@ class RotatingFileHandlerTest(BaseFileTest):
rh.close()
class TimedRotatingFileHandlerTest(BaseFileTest):
+ def test_should_not_rollover(self):
+ # See bpo-45401. Should only ever rollover regular files
+ fh = logging.handlers.TimedRotatingFileHandler(
+ os.devnull, 'S', encoding="utf-8", backupCount=1)
+ time.sleep(1.1) # a little over a second ...
+ r = logging.makeLogRecord({'msg': 'testing - device file'})
+ self.assertFalse(fh.shouldRollover(r))
+ fh.close()
+
# other test methods added below
def test_rollover(self):
fh = logging.handlers.TimedRotatingFileHandler(self.fn, 'S',

@ -0,0 +1,267 @@
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 8da1965..9864fe2 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -884,7 +884,8 @@ regen-opcode-targets:
$(srcdir)/Python/opcode_targets.h.new
$(UPDATE_FILE) $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/opcode_targets.h.new
-Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/ceval_gil.h
+Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/ceval_gil.h \
+ $(srcdir)/Python/condvar.h
Python/frozen.o: $(srcdir)/Python/importlib.h $(srcdir)/Python/importlib_external.h
@@ -1706,7 +1707,7 @@ patchcheck: @DEF_MAKE_RULE@
# Dependencies
-Python/thread.o: @THREADHEADERS@
+Python/thread.o: @THREADHEADERS@ $(srcdir)/Python/condvar.h
# Declare targets that aren't real files
.PHONY: all build_all sharedmods check-clean-src oldsharedmods test quicktest
diff --git a/Python/ceval.c b/Python/ceval.c
index 0b30cc1..3f1300c 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -232,6 +232,7 @@ PyEval_InitThreads(void)
{
if (gil_created())
return;
+ PyThread_init_thread();
create_gil();
take_gil(PyThreadState_GET());
main_thread = PyThread_get_thread_ident();
diff --git a/Python/condvar.h b/Python/condvar.h
index 9a71b17..39a420f 100644
--- a/Python/condvar.h
+++ b/Python/condvar.h
@@ -59,20 +59,6 @@
#include <pthread.h>
-#define PyCOND_ADD_MICROSECONDS(tv, interval) \
-do { /* TODO: add overflow and truncation checks */ \
- tv.tv_usec += (long) interval; \
- tv.tv_sec += tv.tv_usec / 1000000; \
- tv.tv_usec %= 1000000; \
-} while (0)
-
-/* We assume all modern POSIX systems have gettimeofday() */
-#ifdef GETTIMEOFDAY_NO_TZ
-#define PyCOND_GETTIMEOFDAY(ptv) gettimeofday(ptv)
-#else
-#define PyCOND_GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
-#endif
-
/* The following functions return 0 on success, nonzero on error */
#define PyMUTEX_T pthread_mutex_t
#define PyMUTEX_INIT(mut) pthread_mutex_init((mut), NULL)
@@ -81,32 +67,30 @@ do { /* TODO: add overflow and truncation checks */ \
#define PyMUTEX_UNLOCK(mut) pthread_mutex_unlock(mut)
#define PyCOND_T pthread_cond_t
-#define PyCOND_INIT(cond) pthread_cond_init((cond), NULL)
+#define PyCOND_INIT(cond) _PyThread_cond_init(cond)
#define PyCOND_FINI(cond) pthread_cond_destroy(cond)
#define PyCOND_SIGNAL(cond) pthread_cond_signal(cond)
#define PyCOND_BROADCAST(cond) pthread_cond_broadcast(cond)
#define PyCOND_WAIT(cond, mut) pthread_cond_wait((cond), (mut))
+/* These private functions are implemented in Python/thread_pthread.h */
+int _PyThread_cond_init(PyCOND_T *cond);
+void _PyThread_cond_after(long long us, struct timespec *abs);
+
/* return 0 for success, 1 on timeout, -1 on error */
Py_LOCAL_INLINE(int)
PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
{
- int r;
- struct timespec ts;
- struct timeval deadline;
-
- PyCOND_GETTIMEOFDAY(&deadline);
- PyCOND_ADD_MICROSECONDS(deadline, us);
- ts.tv_sec = deadline.tv_sec;
- ts.tv_nsec = deadline.tv_usec * 1000;
-
- r = pthread_cond_timedwait((cond), (mut), &ts);
- if (r == ETIMEDOUT)
+ struct timespec abs;
+ _PyThread_cond_after(us, &abs);
+ int ret = pthread_cond_timedwait(cond, mut, &abs);
+ if (ret == ETIMEDOUT) {
return 1;
- else if (r)
+ }
+ if (ret) {
return -1;
- else
- return 0;
+ }
+ return 0;
}
#elif defined(NT_THREADS)
diff --git a/Python/thread.c b/Python/thread.c
index 63eeb1e..c5d0e59 100644
--- a/Python/thread.c
+++ b/Python/thread.c
@@ -6,6 +6,7 @@
Stuff shared by all thread_*.h files is collected here. */
#include "Python.h"
+#include "condvar.h"
#ifndef _POSIX_THREADS
/* This means pthreads are not implemented in libc headers, hence the macro
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index baea71f..7dc295e 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -66,16 +66,6 @@
#endif
#endif
-#if !defined(pthread_attr_default)
-# define pthread_attr_default ((pthread_attr_t *)NULL)
-#endif
-#if !defined(pthread_mutexattr_default)
-# define pthread_mutexattr_default ((pthread_mutexattr_t *)NULL)
-#endif
-#if !defined(pthread_condattr_default)
-# define pthread_condattr_default ((pthread_condattr_t *)NULL)
-#endif
-
/* Whether or not to use semaphores directly rather than emulating them with
* mutexes and condition variables:
@@ -120,6 +110,56 @@ do { \
} while(0)
+/*
+ * pthread_cond support
+ */
+
+#if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+// monotonic is supported statically. It doesn't mean it works on runtime.
+#define CONDATTR_MONOTONIC
+#endif
+
+// NULL when pthread_condattr_setclock(CLOCK_MONOTONIC) is not supported.
+static pthread_condattr_t *condattr_monotonic = NULL;
+
+static void
+init_condattr()
+{
+#ifdef CONDATTR_MONOTONIC
+ static pthread_condattr_t ca;
+ pthread_condattr_init(&ca);
+ if (pthread_condattr_setclock(&ca, CLOCK_MONOTONIC) == 0) {
+ condattr_monotonic = &ca; // Use monotonic clock
+ }
+#endif
+}
+
+int
+_PyThread_cond_init(PyCOND_T *cond)
+{
+ return pthread_cond_init(cond, condattr_monotonic);
+}
+
+void
+_PyThread_cond_after(long long us, struct timespec *abs)
+{
+#ifdef CONDATTR_MONOTONIC
+ if (condattr_monotonic) {
+ clock_gettime(CLOCK_MONOTONIC, abs);
+ abs->tv_sec += us / 1000000;
+ abs->tv_nsec += (us % 1000000) * 1000;
+ abs->tv_sec += abs->tv_nsec / 1000000000;
+ abs->tv_nsec %= 1000000000;
+ return;
+ }
+#endif
+
+ struct timespec ts;
+ MICROSECONDS_TO_TIMESPEC(us, ts);
+ *abs = ts;
+}
+
+
/* A pthread mutex isn't sufficient to model the Python lock type
* because, according to Draft 5 of the docs (P1003.4a/D5), both of the
* following are undefined:
@@ -175,6 +215,7 @@ PyThread__init_thread(void)
extern void pthread_init(void);
pthread_init();
#endif
+ init_condattr();
}
#endif /* !_HAVE_BSDI */
@@ -449,8 +490,7 @@ PyThread_allocate_lock(void)
memset((void *)lock, '\0', sizeof(pthread_lock));
lock->locked = 0;
- status = pthread_mutex_init(&lock->mut,
- pthread_mutexattr_default);
+ status = pthread_mutex_init(&lock->mut, NULL);
CHECK_STATUS_PTHREAD("pthread_mutex_init");
/* Mark the pthread mutex underlying a Python mutex as
pure happens-before. We can't simply mark the
@@ -459,8 +499,7 @@ PyThread_allocate_lock(void)
will cause errors. */
_Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(&lock->mut);
- status = pthread_cond_init(&lock->lock_released,
- pthread_condattr_default);
+ status = _PyThread_cond_init(&lock->lock_released);
CHECK_STATUS_PTHREAD("pthread_cond_init");
if (error) {
@@ -519,9 +558,10 @@ PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds,
success = PY_LOCK_ACQUIRED;
}
else if (microseconds != 0) {
- struct timespec ts;
- if (microseconds > 0)
- MICROSECONDS_TO_TIMESPEC(microseconds, ts);
+ struct timespec abs;
+ if (microseconds > 0) {
+ _PyThread_cond_after(microseconds, &abs);
+ }
/* continue trying until we get the lock */
/* mut must be locked by me -- part of the condition
@@ -530,10 +570,13 @@ PyThread_acquire_lock_timed(PyThread_type_lock lock, PY_TIMEOUT_T microseconds,
if (microseconds > 0) {
status = pthread_cond_timedwait(
&thelock->lock_released,
- &thelock->mut, &ts);
+ &thelock->mut, &abs);
+ if (status == 1) {
+ break;
+ }
if (status == ETIMEDOUT)
break;
- CHECK_STATUS_PTHREAD("pthread_cond_timed_wait");
+ CHECK_STATUS_PTHREAD("pthread_cond_timedwait");
}
else {
status = pthread_cond_wait(
diff --git a/configure.ac b/configure.ac
index a0e3613..8a17559 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3582,7 +3582,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
memrchr mbrtowc mkdirat mkfifo \
mkfifoat mknod mknodat mktime mremap nice openat pathconf pause pipe2 plock poll \
posix_fallocate posix_fadvise pread \
- pthread_init pthread_kill putenv pwrite readlink readlinkat readv realpath renameat \
+ pthread_condattr_setclock pthread_init pthread_kill putenv pwrite readlink readlinkat readv realpath renameat \
select sem_open sem_timedwait sem_getvalue sem_unlink sendfile setegid seteuid \
setgid sethostname \
setlocale setregid setreuid setresuid setresgid setsid setpgid setpgrp setpriority setuid setvbuf \

@ -0,0 +1,80 @@
diff --git a/Lib/ftplib.py b/Lib/ftplib.py
index 2ff251a..385e432 100644
--- a/Lib/ftplib.py
+++ b/Lib/ftplib.py
@@ -104,6 +104,8 @@ class FTP:
welcome = None
passiveserver = 1
encoding = "latin-1"
+ # Disables https://bugs.python.org/issue43285 security if set to True.
+ trust_server_pasv_ipv4_address = False
# Initialization method (called by class instantiation).
# Initialize host to localhost, port to standard ftp port
@@ -333,8 +335,13 @@ class FTP:
return sock
def makepasv(self):
+ """Internal: Does the PASV or EPSV handshake -> (address, port)"""
if self.af == socket.AF_INET:
- host, port = parse227(self.sendcmd('PASV'))
+ untrusted_host, port = parse227(self.sendcmd('PASV'))
+ if self.trust_server_pasv_ipv4_address:
+ host = untrusted_host
+ else:
+ host = self.sock.getpeername()[0]
else:
host, port = parse229(self.sendcmd('EPSV'), self.sock.getpeername())
return host, port
diff --git a/Lib/test/test_ftplib.py b/Lib/test/test_ftplib.py
index 4ff2f71..3ca7cc1 100644
--- a/Lib/test/test_ftplib.py
+++ b/Lib/test/test_ftplib.py
@@ -94,6 +94,10 @@ class DummyFTPHandler(asynchat.async_chat):
self.rest = None
self.next_retr_data = RETR_DATA
self.push('220 welcome')
+ # We use this as the string IPv4 address to direct the client
+ # to in response to a PASV command. To test security behavior.
+ # https://bugs.python.org/issue43285/.
+ self.fake_pasv_server_ip = '252.253.254.255'
def collect_incoming_data(self, data):
self.in_buffer.append(data)
@@ -136,7 +140,8 @@ class DummyFTPHandler(asynchat.async_chat):
sock.bind((self.socket.getsockname()[0], 0))
sock.listen()
sock.settimeout(TIMEOUT)
- ip, port = sock.getsockname()[:2]
+ port = sock.getsockname()[1]
+ ip = self.fake_pasv_server_ip
ip = ip.replace('.', ','); p1 = port / 256; p2 = port % 256
self.push('227 entering passive mode (%s,%d,%d)' %(ip, p1, p2))
conn, addr = sock.accept()
@@ -694,6 +699,26 @@ class TestFTPClass(TestCase):
# IPv4 is in use, just make sure send_epsv has not been used
self.assertEqual(self.server.handler_instance.last_received_cmd, 'pasv')
+ def test_makepasv_issue43285_security_disabled(self):
+ """Test the opt-in to the old vulnerable behavior."""
+ self.client.trust_server_pasv_ipv4_address = True
+ bad_host, port = self.client.makepasv()
+ self.assertEqual(
+ bad_host, self.server.handler_instance.fake_pasv_server_ip)
+ # Opening and closing a connection keeps the dummy server happy
+ # instead of timing out on accept.
+ socket.create_connection((self.client.sock.getpeername()[0], port),
+ timeout=TIMEOUT).close()
+
+ def test_makepasv_issue43285_security_enabled_default(self):
+ self.assertFalse(self.client.trust_server_pasv_ipv4_address)
+ trusted_host, port = self.client.makepasv()
+ self.assertNotEqual(
+ trusted_host, self.server.handler_instance.fake_pasv_server_ip)
+ # Opening and closing a connection keeps the dummy server happy
+ # instead of timing out on accept.
+ socket.create_connection((trusted_host, port), timeout=TIMEOUT).close()
+
def test_with_statement(self):
self.client.quit()

@ -0,0 +1,170 @@
From 6c472d3a1d334d4eeb4a25eba7bf3b01611bf667 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Thu, 6 May 2021 09:56:01 -0700
Subject: [PATCH] [3.6] bpo-43882 - urllib.parse should sanitize urls
containing ASCII newline and tabs (GH-25924)
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
(cherry picked from commit 76cd81d60310d65d01f9d7b48a8985d8ab89c8b4)
Co-authored-by: Senthil Kumaran <senthil@uthcode.com>
(cherry picked from commit 515a7bc4e13645d0945b46a8e1d9102b918cd407)
Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
---
Doc/library/urllib.parse.rst | 13 +++++
Lib/test/test_urlparse.py | 48 +++++++++++++++++++
Lib/urllib/parse.py | 10 ++++
.../2021-04-25-07-46-37.bpo-43882.Jpwx85.rst | 6 +++
4 files changed, 77 insertions(+)
create mode 100644 Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 3c2e37ef2093a..b717d7cc05b2e 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -288,6 +288,9 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
+ Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
+ ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
@@ -296,6 +299,10 @@ or on combining URL components into a URL string.
Characters that affect netloc parsing under NFKC normalization will
now raise :exc:`ValueError`.
+ .. versionchanged:: 3.6.14
+ ASCII newline and tab characters are stripped from the URL.
+
+.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
.. function:: urlunsplit(parts)
@@ -633,6 +640,10 @@ task isn't already covered by the URL parsing functions above.
.. seealso::
+ `WHATWG`_ - URL Living standard
+ Working Group for the URL Standard that defines URLs, domains, IP addresses, the
+ application/x-www-form-urlencoded format, and their API.
+
:rfc:`3986` - Uniform Resource Identifiers
This is the current standard (STD66). Any changes to urllib.parse module
should conform to this. Certain deviations could be observed, which are
@@ -656,3 +667,5 @@ task isn't already covered by the URL parsing functions above.
:rfc:`1738` - Uniform Resource Locators (URL)
This specifies the formal syntax and semantics of absolute URLs.
+
+.. _WHATWG: https://url.spec.whatwg.org/
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index e3088b2f39bd7..3509278a01694 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self):
with self.assertRaisesRegex(ValueError, "out of range"):
p.port
+ def test_urlsplit_remove_unsafe_bytes(self):
+ # Remove ASCII tabs and newlines from input, for http common case scenario.
+ url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "www.python.org")
+ self.assertEqual(p.path, "/javascript:alert('msg')/")
+ self.assertEqual(p.query, "query=something")
+ self.assertEqual(p.fragment, "fragment")
+ self.assertEqual(p.username, None)
+ self.assertEqual(p.password, None)
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, None)
+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
+ url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"www.python.org")
+ self.assertEqual(p.path, b"/javascript:alert('msg')/")
+ self.assertEqual(p.query, b"query=something")
+ self.assertEqual(p.fragment, b"fragment")
+ self.assertEqual(p.username, None)
+ self.assertEqual(p.password, None)
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, None)
+ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # any scheme
+ url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Remove ASCII tabs and newlines from input as bytes, any scheme.
+ url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Unsafe bytes is not returned from urlparse cache.
+ # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme
+ url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ scheme = "htt\nps"
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 66056bf589bf6..ac6e7a9cee0b9 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -76,6 +76,9 @@
'0123456789'
'+-.')
+# Unsafe bytes to be removed per WHATWG spec
+_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
+
# XXX: Consider replacing with functools.lru_cache
MAX_CACHE_SIZE = 20
_parse_cache = {}
@@ -409,6 +412,11 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
+def _remove_unsafe_bytes_from_url(url):
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+ url = url.replace(b, "")
+ return url
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -416,6 +424,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ url = _remove_unsafe_bytes_from_url(url)
+ scheme = _remove_unsafe_bytes_from_url(scheme)
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
diff --git a/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst b/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst
new file mode 100644
index 0000000000000..a326d079dff4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-04-25-07-46-37.bpo-43882.Jpwx85.rst
@@ -0,0 +1,6 @@
+The presence of newline or tab characters in parts of a URL could allow
+some forms of attacks.
+
+Following the controlling specification for URLs defined by WHATWG
+:func:`urllib.parse` now removes ASCII newlines and tabs from URLs,
+preventing such attacks.

@ -0,0 +1,98 @@
From a5b78c6f1c802f6023bd4d7a248dc83be1eef6a3 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 21 Feb 2022 15:48:32 +0100
Subject: [PATCH] 00378: Support expat 2.4.5
Curly brackets were never allowed in namespace URIs
according to RFC 3986, and so-called namespace-validating
XML parsers have the right to reject them a invalid URIs.
libexpat >=2.4.5 has become strcter in that regard due to
related security issues; with ET.XML instantiating a
namespace-aware parser under the hood, this test has no
future in CPython.
References:
- https://datatracker.ietf.org/doc/html/rfc3968
- https://www.w3.org/TR/xml-names/
Also, test_minidom.py: Support Expat >=2.4.5
Upstream: https://bugs.python.org/issue46811
Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
---
Lib/test/test_minidom.py | 12 +++++++++---
Lib/test/test_xml_etree.py | 6 ------
.../Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst | 1 +
3 files changed, 10 insertions(+), 9 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index d55e25e..e947382 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -5,10 +5,12 @@ import pickle
from test import support
import unittest
+import pyexpat
import xml.dom.minidom
from xml.dom.minidom import parse, Node, Document, parseString
from xml.dom.minidom import getDOMImplementation
+from xml.parsers.expat import ExpatError
tstfile = support.findfile("test.xml", subdir="xmltestdata")
@@ -1156,8 +1158,10 @@ class MinidomTest(unittest.TestCase):
# Verify that character decoding errors raise exceptions instead
# of crashing
- self.assertRaises(UnicodeDecodeError, parseString,
- b'<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
+ self.assertRaises(ExpatError, parseString,
+ b'<fran\xe7ais></fran\xe7ais>')
+ self.assertRaises(ExpatError, parseString,
+ b'<franais>Comment \xe7a va ? Tr\xe8s bien ?</franais>')
doc.unlink()
@@ -1602,7 +1606,9 @@ class MinidomTest(unittest.TestCase):
self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE)
def testExceptionOnSpacesInXMLNSValue(self):
- with self.assertRaisesRegex(ValueError, 'Unsupported syntax'):
+ context = self.assertRaisesRegex(ExpatError, 'syntax error')
+
+ with context:
parseString('<element xmlns:abc="http:abc.com/de f g/hi/j k"><abc:foo /></element>')
def testDocRemoveChild(self):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index b01709e..acaa519 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1668,12 +1668,6 @@ class BugsTest(unittest.TestCase):
b"<?xml version='1.0' encoding='ascii'?>\n"
b'<body>t&#227;g</body>')
- def test_issue3151(self):
- e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
- self.assertEqual(e.tag, '{${stuff}}localname')
- t = ET.ElementTree(e)
- self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
-
def test_issue6565(self):
elem = ET.XML("<body><tag/></body>")
self.assertEqual(summarize_list(elem), ['tag'])
diff --git a/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst
new file mode 100644
index 0000000..6969bd1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst
@@ -0,0 +1 @@
+Make test suite support Expat >=2.4.5
--
2.35.1

@ -0,0 +1,150 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Fri, 3 Jun 2022 11:43:35 +0200
Subject: [PATCH] 00382: CVE-2015-20107
Make mailcap refuse to match unsafe filenames/types/params (GH-91993)
Upstream: https://github.com/python/cpython/issues/68966
Tracker bug: https://bugzilla.redhat.com/show_bug.cgi?id=2075390
---
Doc/library/mailcap.rst | 12 +++++++++
Lib/mailcap.py | 26 +++++++++++++++++--
Lib/test/test_mailcap.py | 8 ++++--
...2-04-27-18-25-30.gh-issue-68966.gjS8zs.rst | 4 +++
4 files changed, 46 insertions(+), 4 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2022-04-27-18-25-30.gh-issue-68966.gjS8zs.rst
diff --git a/Doc/library/mailcap.rst b/Doc/library/mailcap.rst
index 896afd1d73..849d0bc05f 100644
--- a/Doc/library/mailcap.rst
+++ b/Doc/library/mailcap.rst
@@ -54,6 +54,18 @@ standard. However, mailcap files are supported on most Unix systems.
use) to determine whether or not the mailcap line applies. :func:`findmatch`
will automatically check such conditions and skip the entry if the check fails.
+ .. versionchanged:: 3.11
+
+ To prevent security issues with shell metacharacters (symbols that have
+ special effects in a shell command line), ``findmatch`` will refuse
+ to inject ASCII characters other than alphanumerics and ``@+=:,./-_``
+ into the returned command line.
+
+ If a disallowed character appears in *filename*, ``findmatch`` will always
+ return ``(None, None)`` as if no entry was found.
+ If such a character appears elsewhere (a value in *plist* or in *MIMEtype*),
+ ``findmatch`` will ignore all mailcap entries which use that value.
+ A :mod:`warning <warnings>` will be raised in either case.
.. function:: getcaps()
diff --git a/Lib/mailcap.py b/Lib/mailcap.py
index bd0fc0981c..dcd4b449e8 100644
--- a/Lib/mailcap.py
+++ b/Lib/mailcap.py
@@ -2,6 +2,7 @@
import os
import warnings
+import re
__all__ = ["getcaps","findmatch"]
@@ -13,6 +14,11 @@ def lineno_sort_key(entry):
else:
return 1, 0
+_find_unsafe = re.compile(r'[^\xa1-\U0010FFFF\w@+=:,./-]').search
+
+class UnsafeMailcapInput(Warning):
+ """Warning raised when refusing unsafe input"""
+
# Part 1: top-level interface.
@@ -165,15 +171,22 @@ def findmatch(caps, MIMEtype, key='view', filename="/dev/null", plist=[]):
entry to use.
"""
+ if _find_unsafe(filename):
+ msg = "Refusing to use mailcap with filename %r. Use a safe temporary filename." % (filename,)
+ warnings.warn(msg, UnsafeMailcapInput)
+ return None, None
entries = lookup(caps, MIMEtype, key)
# XXX This code should somehow check for the needsterminal flag.
for e in entries:
if 'test' in e:
test = subst(e['test'], filename, plist)
+ if test is None:
+ continue
if test and os.system(test) != 0:
continue
command = subst(e[key], MIMEtype, filename, plist)
- return command, e
+ if command is not None:
+ return command, e
return None, None
def lookup(caps, MIMEtype, key=None):
@@ -206,6 +219,10 @@ def subst(field, MIMEtype, filename, plist=[]):
elif c == 's':
res = res + filename
elif c == 't':
+ if _find_unsafe(MIMEtype):
+ msg = "Refusing to substitute MIME type %r into a shell command." % (MIMEtype,)
+ warnings.warn(msg, UnsafeMailcapInput)
+ return None
res = res + MIMEtype
elif c == '{':
start = i
@@ -213,7 +230,12 @@ def subst(field, MIMEtype, filename, plist=[]):
i = i+1
name = field[start:i]
i = i+1
- res = res + findparam(name, plist)
+ param = findparam(name, plist)
+ if _find_unsafe(param):
+ msg = "Refusing to substitute parameter %r (%s) into a shell command" % (param, name)
+ warnings.warn(msg, UnsafeMailcapInput)
+ return None
+ res = res + param
# XXX To do:
# %n == number of parts if type is multipart/*
# %F == list of alternating type and filename for parts
diff --git a/Lib/test/test_mailcap.py b/Lib/test/test_mailcap.py
index c08423c670..920283d9a2 100644
--- a/Lib/test/test_mailcap.py
+++ b/Lib/test/test_mailcap.py
@@ -121,7 +121,8 @@ class HelperFunctionTest(unittest.TestCase):
(["", "audio/*", "foo.txt"], ""),
(["echo foo", "audio/*", "foo.txt"], "echo foo"),
(["echo %s", "audio/*", "foo.txt"], "echo foo.txt"),
- (["echo %t", "audio/*", "foo.txt"], "echo audio/*"),
+ (["echo %t", "audio/*", "foo.txt"], None),
+ (["echo %t", "audio/wav", "foo.txt"], "echo audio/wav"),
(["echo \\%t", "audio/*", "foo.txt"], "echo %t"),
(["echo foo", "audio/*", "foo.txt", plist], "echo foo"),
(["echo %{total}", "audio/*", "foo.txt", plist], "echo 3")
@@ -205,7 +206,10 @@ class FindmatchTest(unittest.TestCase):
('"An audio fragment"', audio_basic_entry)),
([c, "audio/*"],
{"filename": fname},
- ("/usr/local/bin/showaudio audio/*", audio_entry)),
+ (None, None)),
+ ([c, "audio/wav"],
+ {"filename": fname},
+ ("/usr/local/bin/showaudio audio/wav", audio_entry)),
([c, "message/external-body"],
{"plist": plist},
("showexternal /dev/null default john python.org /tmp foo bar", message_entry))
diff --git a/Misc/NEWS.d/next/Security/2022-04-27-18-25-30.gh-issue-68966.gjS8zs.rst b/Misc/NEWS.d/next/Security/2022-04-27-18-25-30.gh-issue-68966.gjS8zs.rst
new file mode 100644
index 0000000000..da81a1f699
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-04-27-18-25-30.gh-issue-68966.gjS8zs.rst
@@ -0,0 +1,4 @@
+The deprecated mailcap module now refuses to inject unsafe text (filenames,
+MIME types, parameters) into shell commands. Instead of using such text, it
+will warn and act as if a match was not found (or for test commands, as if
+the test failed).

@ -0,0 +1,130 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 22 Jun 2022 15:05:00 -0700
Subject: [PATCH] 00386: CVE-2021-28861
Fix an open redirection vulnerability in the `http.server` module when
an URI path starts with `//` that could produce a 301 Location header
with a misleading target. Vulnerability discovered, and logic fix
proposed, by Hamza Avvan (@hamzaavvan).
Test and comments authored by Gregory P. Smith [Google].
(cherry picked from commit 4abab6b603dd38bec1168e9a37c40a48ec89508e)
Upstream: https://github.com/python/cpython/pull/93879
Tracking bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2120642
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/http/server.py | 7 +++
Lib/test/test_httpservers.py | 53 ++++++++++++++++++-
...2-06-15-20-09-23.gh-issue-87389.QVaC3f.rst | 3 ++
3 files changed, 61 insertions(+), 2 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 60a4dadf03..ce05be13d3 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -323,6 +323,13 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
return False
self.command, self.path, self.request_version = command, path, version
+ # gh-87389: The purpose of replacing '//' with '/' is to protect
+ # against open redirect attacks possibly triggered if the path starts
+ # with '//' because http clients treat //path as an absolute URI
+ # without scheme (similar to http://path) rather than a path.
+ if self.path.startswith('//'):
+ self.path = '/' + self.path.lstrip('/') # Reduce to a single /
+
# Examine the headers and look for a Connection directive.
try:
self.headers = http.client.parse_headers(self.rfile,
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 66e937e04b..5a0a7c3f74 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -324,7 +324,7 @@ class SimpleHTTPServerTestCase(BaseTestCase):
pass
def setUp(self):
- BaseTestCase.setUp(self)
+ super().setUp()
self.cwd = os.getcwd()
basetempdir = tempfile.gettempdir()
os.chdir(basetempdir)
@@ -343,7 +343,7 @@ class SimpleHTTPServerTestCase(BaseTestCase):
except:
pass
finally:
- BaseTestCase.tearDown(self)
+ super().tearDown()
def check_status_and_reason(self, response, status, data=None):
def close_conn():
@@ -399,6 +399,55 @@ class SimpleHTTPServerTestCase(BaseTestCase):
self.check_status_and_reason(response, HTTPStatus.OK,
data=support.TESTFN_UNDECODABLE)
+ def test_get_dir_redirect_location_domain_injection_bug(self):
+ """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location.
+
+ //netloc/ in a Location header is a redirect to a new host.
+ https://github.com/python/cpython/issues/87389
+
+ This checks that a path resolving to a directory on our server cannot
+ resolve into a redirect to another server.
+ """
+ os.mkdir(os.path.join(self.tempdir, 'existing_directory'))
+ url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory'
+ expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash
+ # Canonicalizes to /tmp/tempdir_name/existing_directory which does
+ # exist and is a dir, triggering the 301 redirect logic.
+ response = self.request(url)
+ self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+ location = response.getheader('Location')
+ self.assertEqual(location, expected_location, msg='non-attack failed!')
+
+ # //python.org... multi-slash prefix, no trailing slash
+ attack_url = f'/{url}'
+ response = self.request(attack_url)
+ self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+ location = response.getheader('Location')
+ self.assertFalse(location.startswith('//'), msg=location)
+ self.assertEqual(location, expected_location,
+ msg='Expected Location header to start with a single / and '
+ 'end with a / as this is a directory redirect.')
+
+ # ///python.org... triple-slash prefix, no trailing slash
+ attack3_url = f'//{url}'
+ response = self.request(attack3_url)
+ self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+ self.assertEqual(response.getheader('Location'), expected_location)
+
+ # If the second word in the http request (Request-URI for the http
+ # method) is a full URI, we don't worry about it, as that'll be parsed
+ # and reassembled as a full URI within BaseHTTPRequestHandler.send_head
+ # so no errant scheme-less //netloc//evil.co/ domain mixup can happen.
+ attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}'
+ expected_scheme_netloc_location = f'{attack_scheme_netloc_2slash_url}/'
+ response = self.request(attack_scheme_netloc_2slash_url)
+ self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY)
+ location = response.getheader('Location')
+ # We're just ensuring that the scheme and domain make it through, if
+ # there are or aren't multiple slashes at the start of the path that
+ # follows that isn't important in this Location: header.
+ self.assertTrue(location.startswith('https://pypi.org/'), msg=location)
+
def test_get(self):
#constructs the path relative to the root directory of the HTTPServer
response = self.request(self.base_url + '/test')
diff --git a/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst b/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst
new file mode 100644
index 0000000000..029d437190
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-06-15-20-09-23.gh-issue-87389.QVaC3f.rst
@@ -0,0 +1,3 @@
+:mod:`http.server`: Fix an open redirection vulnerability in the HTTP server
+when an URI path starts with ``//``. Vulnerability discovered, and initial
+fix proposed, by Hamza Avvan.

@ -0,0 +1,95 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 7 Nov 2022 19:22:14 -0800
Subject: [PATCH] 00394: CVE-2022-45061: CPU denial of service via inefficient
IDNA decoder
gh-98433: Fix quadratic time idna decoding.
There was an unnecessary quadratic loop in idna decoding. This restores
the behavior to linear.
(cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15)
Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/encodings/idna.py | 32 +++++++++----------
Lib/test/test_codecs.py | 6 ++++
...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 6 ++++
3 files changed, 27 insertions(+), 17 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index ea4058512f..bf98f51336 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -39,23 +39,21 @@ def nameprep(label):
# Check bidi
RandAL = [stringprep.in_table_d1(x) for x in label]
- for c in RandAL:
- if c:
- # There is a RandAL char in the string. Must perform further
- # tests:
- # 1) The characters in section 5.8 MUST be prohibited.
- # This is table C.8, which was already checked
- # 2) If a string contains any RandALCat character, the string
- # MUST NOT contain any LCat character.
- if any(stringprep.in_table_d2(x) for x in label):
- raise UnicodeError("Violation of BIDI requirement 2")
-
- # 3) If a string contains any RandALCat character, a
- # RandALCat character MUST be the first character of the
- # string, and a RandALCat character MUST be the last
- # character of the string.
- if not RandAL[0] or not RandAL[-1]:
- raise UnicodeError("Violation of BIDI requirement 3")
+ if any(RandAL):
+ # There is a RandAL char in the string. Must perform further
+ # tests:
+ # 1) The characters in section 5.8 MUST be prohibited.
+ # This is table C.8, which was already checked
+ # 2) If a string contains any RandALCat character, the string
+ # MUST NOT contain any LCat character.
+ if any(stringprep.in_table_d2(x) for x in label):
+ raise UnicodeError("Violation of BIDI requirement 2")
+ # 3) If a string contains any RandALCat character, a
+ # RandALCat character MUST be the first character of the
+ # string, and a RandALCat character MUST be the last
+ # character of the string.
+ if not RandAL[0] or not RandAL[-1]:
+ raise UnicodeError("Violation of BIDI requirement 3")
return label
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 56485de3f6..a798d1f287 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1640,6 +1640,12 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
+ def test_builtin_decode_length_limit(self):
+ with self.assertRaisesRegex(UnicodeError, "too long"):
+ (b"xn--016c"+b"a"*1100).decode("idna")
+ with self.assertRaisesRegex(UnicodeError, "too long"):
+ (b"xn--016c"+b"a"*70).decode("idna")
+
def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc"))
r.read(3)
diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
new file mode 100644
index 0000000000..5185fac2e2
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
@@ -0,0 +1,6 @@
+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
+related name resolution functions no longer involves a quadratic algorithm.
+This prevents a potential CPU denial of service if an out-of-spec excessive
+length hostname involving bidirectional characters were decoded. Some protocols
+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
+to supply such a name.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,223 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 22 May 2023 03:42:37 -0700
Subject: [PATCH] 00399: CVE-2023-24329
gh-102153: Start stripping C0 control and space chars in `urlsplit` (GH-102508)
`urllib.parse.urlsplit` has already been respecting the WHATWG spec a bit GH-25595.
This adds more sanitizing to respect the "Remove any leading C0 control or space from input" [rule](https://url.spec.whatwg.org/GH-url-parsing:~:text=Remove%20any%20leading%20and%20trailing%20C0%20control%20or%20space%20from%20input.) in response to [CVE-2023-24329](https://nvd.nist.gov/vuln/detail/CVE-2023-24329).
Backported from Python 3.12
(cherry picked from commit f48a96a28012d28ae37a2f4587a780a5eb779946)
Co-authored-by: Illia Volochii <illia.volochii@gmail.com>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
---
Doc/library/urllib.parse.rst | 40 +++++++++++-
Lib/test/test_urlparse.py | 61 ++++++++++++++++++-
Lib/urllib/parse.py | 12 ++++
...-03-07-20-59-17.gh-issue-102153.14CLSZ.rst | 3 +
4 files changed, 113 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index b717d7cc05..83a7a82089 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -126,6 +126,12 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
+
+ .. warning::
+
+ :func:`urlparse` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
+
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -288,8 +294,14 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
- Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
- ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+ Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
+ control and space characters are stripped from the URL. ``\n``,
+ ``\r`` and tab ``\t`` characters are removed from the URL at any position.
+
+ .. warning::
+
+ :func:`urlsplit` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -302,6 +314,9 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.6.14
ASCII newline and tab characters are stripped from the URL.
+ .. versionchanged:: 3.6.15
+ Leading WHATWG C0 control and space characters are stripped from the URL.
+
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
.. function:: urlunsplit(parts)
@@ -371,6 +386,27 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.2
Result is a structured object rather than a simple 2-tuple.
+.. _url-parsing-security:
+
+URL parsing security
+--------------------
+
+The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
+inputs. They may not raise errors on inputs that other applications consider
+invalid. They may also succeed on some inputs that might not be considered
+URLs elsewhere. Their purpose is for practical functionality rather than
+purity.
+
+Instead of raising an exception on unusual input, they may instead return some
+component parts as empty strings. Or components may contain more than perhaps
+they should.
+
+We recommend that users of these APIs where the values may be used anywhere
+with security implications code defensively. Do some verification within your
+code before trusting a returned component part. Does that ``scheme`` make
+sense? Is that a sensible ``path``? Is there anything strange about that
+``hostname``? etc.
+
.. _parsing-ascii-encoded-bytes:
Parsing ASCII Encoded Bytes
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 3509278a01..7fd61ffea9 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -660,6 +660,65 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(p.scheme, "https")
self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")
+ def test_urlsplit_strip_url(self):
+ noise = bytes(range(0, 0x20 + 1))
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
+
+ url = noise.decode("utf-8") + base_url
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query=yes")
+ self.assertEqual(p.fragment, "frag")
+ self.assertEqual(p.username, "User")
+ self.assertEqual(p.password, "Pass")
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url)
+
+ url = noise + base_url.encode("utf-8")
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
+ self.assertEqual(p.path, b"/doc/")
+ self.assertEqual(p.query, b"query=yes")
+ self.assertEqual(p.fragment, b"frag")
+ self.assertEqual(p.username, b"User")
+ self.assertEqual(p.password, b"Pass")
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url.encode("utf-8"))
+
+ # Test that trailing space is preserved as some applications rely on
+ # this within query strings.
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
+ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.netloc, "www.python.org:88")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query= ")
+ self.assertEqual(p.port, 88)
+ self.assertEqual(p.geturl(), query_spaces_url)
+
+ p = urllib.parse.urlsplit("www.pypi.org ")
+ # That "hostname" gets considered a "path" due to the
+ # trailing space and our existing logic... YUCK...
+ # and re-assembles via geturl aka unurlsplit into the original.
+ # django.core.validators.URLValidator (at least through v3.2) relies on
+ # this, for better or worse, to catch it in a ValidationError via its
+ # regular expressions.
+ # Here we test the basic round trip concept of such a trailing space.
+ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
+
+ # with scheme as cache-key
+ url = "//www.python.org/"
+ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
@@ -667,7 +726,7 @@ class UrlParseTestCase(unittest.TestCase):
for port in ("foo", "1.5", "-1", "0x10"):
with self.subTest(bytes=bytes, parse=parse, port=port):
netloc = "www.example.net:" + port
- url = "http://" + netloc
+ url = "http://" + netloc + "/"
if bytes:
netloc = netloc.encode("ascii")
url = url.encode("ascii")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index ac6e7a9cee..717e990997 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -25,6 +25,10 @@ currently not entirely compliant with this RFC due to defacto
scenarios for parsing, and for backward compatibility purposes, some
parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
+
+The WHATWG URL Parser spec should also be considered. We are not compliant with
+it either due to existing user code API behavior expectations (Hyrum's Law).
+It serves as a useful guide when making changes.
"""
import re
@@ -76,6 +80,10 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'0123456789'
'+-.')
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
+
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
@@ -426,6 +434,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
url, scheme, _coerce_result = _coerce_args(url, scheme)
url = _remove_unsafe_bytes_from_url(url)
scheme = _remove_unsafe_bytes_from_url(scheme)
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
+ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
new file mode 100644
index 0000000000..e57ac4ed3a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -0,0 +1,3 @@
+:func:`urllib.parse.urlsplit` now strips leading C0 control and space
+characters following the specification for URLs defined by WHATWG in
+response to CVE-2023-24329. Patch by Illia Volochii.

@ -0,0 +1,648 @@
From 9f39318072b5775cf527f83daf8cb5d64678ac86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Langa?= <lukasz@langa.pl>
Date: Tue, 22 Aug 2023 19:57:01 +0200
Subject: [PATCH 1/4] gh-108310: Fix CVE-2023-40217: Check for & avoid the ssl
pre-close flaw (#108321)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
gh-108310: Fix CVE-2023-40217: Check for & avoid the ssl pre-close flaw
Instances of `ssl.SSLSocket` were vulnerable to a bypass of the TLS handshake
and included protections (like certificate verification) and treating sent
unencrypted data as if it were post-handshake TLS encrypted data.
The vulnerability is caused when a socket is connected, data is sent by the
malicious peer and stored in a buffer, and then the malicious peer closes the
socket within a small timing window before the other peers TLS handshake can
begin. After this sequence of events the closed socket will not immediately
attempt a TLS handshake due to not being connected but will also allow the
buffered data to be read as if a successful TLS handshake had occurred.
Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
---
Lib/ssl.py | 31 ++-
Lib/test/test_ssl.py | 214 ++++++++++++++++++
...-08-22-17-39-12.gh-issue-108310.fVM3sg.rst | 7 +
3 files changed, 251 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-08-22-17-39-12.gh-issue-108310.fVM3sg.rst
diff --git a/Lib/ssl.py b/Lib/ssl.py
index c5c5529..288f237 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -741,7 +741,7 @@ class SSLSocket(socket):
type=sock.type,
proto=sock.proto,
fileno=sock.fileno())
- self.settimeout(sock.gettimeout())
+ sock_timeout = sock.gettimeout()
sock.detach()
elif fileno is not None:
socket.__init__(self, fileno=fileno)
@@ -755,9 +755,38 @@ class SSLSocket(socket):
if e.errno != errno.ENOTCONN:
raise
connected = False
+ blocking = self.getblocking()
+ self.setblocking(False)
+ try:
+ # We are not connected so this is not supposed to block, but
+ # testing revealed otherwise on macOS and Windows so we do
+ # the non-blocking dance regardless. Our raise when any data
+ # is found means consuming the data is harmless.
+ notconn_pre_handshake_data = self.recv(1)
+ except OSError as e:
+ # EINVAL occurs for recv(1) on non-connected on unix sockets.
+ if e.errno not in (errno.ENOTCONN, errno.EINVAL):
+ raise
+ notconn_pre_handshake_data = b''
+ self.setblocking(blocking)
+ if notconn_pre_handshake_data:
+ # This prevents pending data sent to the socket before it was
+ # closed from escaping to the caller who could otherwise
+ # presume it came through a successful TLS connection.
+ reason = "Closed before TLS handshake with data in recv buffer."
+ notconn_pre_handshake_data_error = SSLError(e.errno, reason)
+ # Add the SSLError attributes that _ssl.c always adds.
+ notconn_pre_handshake_data_error.reason = reason
+ notconn_pre_handshake_data_error.library = None
+ try:
+ self.close()
+ except OSError:
+ pass
+ raise notconn_pre_handshake_data_error
else:
connected = True
+ self.settimeout(sock_timeout) # Must come after setblocking() calls.
self._closed = False
self._sslobj = None
self._connected = connected
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index b35db25..e24d11b 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -3,11 +3,14 @@
import sys
import unittest
from test import support
+import re
import socket
import select
+import struct
import time
import datetime
import gc
+import http.client
import os
import errno
import pprint
@@ -3940,6 +3943,217 @@ class TestPostHandshakeAuth(unittest.TestCase):
# server cert has not been validated
self.assertEqual(s.getpeercert(), {})
+def set_socket_so_linger_on_with_zero_timeout(sock):
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', 1, 0))
+
+
+class TestPreHandshakeClose(unittest.TestCase):
+ """Verify behavior of close sockets with received data before to the handshake.
+ """
+
+ class SingleConnectionTestServerThread(threading.Thread):
+
+ def __init__(self, *, name, call_after_accept):
+ self.call_after_accept = call_after_accept
+ self.received_data = b'' # set by .run()
+ self.wrap_error = None # set by .run()
+ self.listener = None # set by .start()
+ self.port = None # set by .start()
+ super().__init__(name=name)
+
+ def __enter__(self):
+ self.start()
+ return self
+
+ def __exit__(self, *args):
+ try:
+ if self.listener:
+ self.listener.close()
+ except OSError:
+ pass
+ self.join()
+ self.wrap_error = None # avoid dangling references
+
+ def start(self):
+ self.ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+ self.ssl_ctx.verify_mode = ssl.CERT_REQUIRED
+ self.ssl_ctx.load_verify_locations(cafile=ONLYCERT)
+ self.ssl_ctx.load_cert_chain(certfile=ONLYCERT, keyfile=ONLYKEY)
+ self.listener = socket.socket()
+ self.port = support.bind_port(self.listener)
+ self.listener.settimeout(2.0)
+ self.listener.listen(1)
+ super().start()
+
+ def run(self):
+ conn, address = self.listener.accept()
+ self.listener.close()
+ with conn:
+ if self.call_after_accept(conn):
+ return
+ try:
+ tls_socket = self.ssl_ctx.wrap_socket(conn, server_side=True)
+ except OSError as err: # ssl.SSLError inherits from OSError
+ self.wrap_error = err
+ else:
+ try:
+ self.received_data = tls_socket.recv(400)
+ except OSError:
+ pass # closed, protocol error, etc.
+
+ def non_linux_skip_if_other_okay_error(self, err):
+ if sys.platform == "linux":
+ return # Expect the full test setup to always work on Linux.
+ if (isinstance(err, ConnectionResetError) or
+ (isinstance(err, OSError) and err.errno == errno.EINVAL) or
+ re.search('wrong.version.number', getattr(err, "reason", ""), re.I)):
+ # On Windows the TCP RST leads to a ConnectionResetError
+ # (ECONNRESET) which Linux doesn't appear to surface to userspace.
+ # If wrap_socket() winds up on the "if connected:" path and doing
+ # the actual wrapping... we get an SSLError from OpenSSL. Typically
+ # WRONG_VERSION_NUMBER. While appropriate, neither is the scenario
+ # we're specifically trying to test. The way this test is written
+ # is known to work on Linux. We'll skip it anywhere else that it
+ # does not present as doing so.
+ self.skipTest("Could not recreate conditions on {}: \
+ err={}".format(sys.platform,err))
+ # If maintaining this conditional winds up being a problem.
+ # just turn this into an unconditional skip anything but Linux.
+ # The important thing is that our CI has the logic covered.
+
+ def test_preauth_data_to_tls_server(self):
+ server_accept_called = threading.Event()
+ ready_for_server_wrap_socket = threading.Event()
+
+ def call_after_accept(unused):
+ server_accept_called.set()
+ if not ready_for_server_wrap_socket.wait(2.0):
+ raise RuntimeError("wrap_socket event never set, test may fail.")
+ return False # Tell the server thread to continue.
+
+ server = self.SingleConnectionTestServerThread(
+ call_after_accept=call_after_accept,
+ name="preauth_data_to_tls_server")
+ server.__enter__() # starts it
+ self.addCleanup(server.__exit__) # ... & unittest.TestCase stops it.
+
+ with socket.socket() as client:
+ client.connect(server.listener.getsockname())
+ # This forces an immediate connection close via RST on .close().
+ set_socket_so_linger_on_with_zero_timeout(client)
+ client.setblocking(False)
+
+ server_accept_called.wait()
+ client.send(b"DELETE /data HTTP/1.0\r\n\r\n")
+ client.close() # RST
+
+ ready_for_server_wrap_socket.set()
+ server.join()
+ wrap_error = server.wrap_error
+ self.assertEqual(b"", server.received_data)
+ self.assertIsInstance(wrap_error, OSError) # All platforms.
+ self.non_linux_skip_if_other_okay_error(wrap_error)
+ self.assertIsInstance(wrap_error, ssl.SSLError)
+ self.assertIn("before TLS handshake with data", wrap_error.args[1])
+ self.assertIn("before TLS handshake with data", wrap_error.reason)
+ self.assertNotEqual(0, wrap_error.args[0])
+ self.assertIsNone(wrap_error.library, msg="attr must exist")
+
+ def test_preauth_data_to_tls_client(self):
+ client_can_continue_with_wrap_socket = threading.Event()
+
+ def call_after_accept(conn_to_client):
+ # This forces an immediate connection close via RST on .close().
+ set_socket_so_linger_on_with_zero_timeout(conn_to_client)
+ conn_to_client.send(
+ b"HTTP/1.0 307 Temporary Redirect\r\n"
+ b"Location: https://example.com/someone-elses-server\r\n"
+ b"\r\n")
+ conn_to_client.close() # RST
+ client_can_continue_with_wrap_socket.set()
+ return True # Tell the server to stop.
+
+ server = self.SingleConnectionTestServerThread(
+ call_after_accept=call_after_accept,
+ name="preauth_data_to_tls_client")
+ server.__enter__() # starts it
+ self.addCleanup(server.__exit__) # ... & unittest.TestCase stops it.
+
+ # Redundant; call_after_accept sets SO_LINGER on the accepted conn.
+ set_socket_so_linger_on_with_zero_timeout(server.listener)
+
+ with socket.socket() as client:
+ client.connect(server.listener.getsockname())
+ if not client_can_continue_with_wrap_socket.wait(2.0):
+ self.fail("test server took too long.")
+ ssl_ctx = ssl.create_default_context()
+ try:
+ tls_client = ssl_ctx.wrap_socket(
+ client, server_hostname="localhost")
+ except OSError as err: # SSLError inherits from OSError
+ wrap_error = err
+ received_data = b""
+ else:
+ wrap_error = None
+ received_data = tls_client.recv(400)
+ tls_client.close()
+
+ server.join()
+ self.assertEqual(b"", received_data)
+ self.assertIsInstance(wrap_error, OSError) # All platforms.
+ self.non_linux_skip_if_other_okay_error(wrap_error)
+ self.assertIsInstance(wrap_error, ssl.SSLError)
+ self.assertIn("before TLS handshake with data", wrap_error.args[1])
+ self.assertIn("before TLS handshake with data", wrap_error.reason)
+ self.assertNotEqual(0, wrap_error.args[0])
+ self.assertIsNone(wrap_error.library, msg="attr must exist")
+
+ def test_https_client_non_tls_response_ignored(self):
+
+ server_responding = threading.Event()
+
+ class SynchronizedHTTPSConnection(http.client.HTTPSConnection):
+ def connect(self):
+ http.client.HTTPConnection.connect(self)
+ # Wait for our fault injection server to have done its thing.
+ if not server_responding.wait(1.0) and support.verbose:
+ sys.stdout.write("server_responding event never set.")
+ self.sock = self._context.wrap_socket(
+ self.sock, server_hostname=self.host)
+
+ def call_after_accept(conn_to_client):
+ # This forces an immediate connection close via RST on .close().
+ set_socket_so_linger_on_with_zero_timeout(conn_to_client)
+ conn_to_client.send(
+ b"HTTP/1.0 402 Payment Required\r\n"
+ b"\r\n")
+ conn_to_client.close() # RST
+ server_responding.set()
+ return True # Tell the server to stop.
+
+ server = self.SingleConnectionTestServerThread(
+ call_after_accept=call_after_accept,
+ name="non_tls_http_RST_responder")
+ server.__enter__() # starts it
+ self.addCleanup(server.__exit__) # ... & unittest.TestCase stops it.
+ # Redundant; call_after_accept sets SO_LINGER on the accepted conn.
+ set_socket_so_linger_on_with_zero_timeout(server.listener)
+
+ connection = SynchronizedHTTPSConnection(
+ f"localhost",
+ port=server.port,
+ context=ssl.create_default_context(),
+ timeout=2.0,
+ )
+ # There are lots of reasons this raises as desired, long before this
+ # test was added. Sending the request requires a successful TLS wrapped
+ # socket; that fails if the connection is broken. It may seem pointless
+ # to test this. It serves as an illustration of something that we never
+ # want to happen... properly not happening.
+ with self.assertRaises(OSError) as err_ctx:
+ connection.request("HEAD", "/test", headers={"Host": "localhost"})
+ response = connection.getresponse()
+
def test_main(verbose=False):
if support.verbose:
diff --git a/Misc/NEWS.d/next/Security/2023-08-22-17-39-12.gh-issue-108310.fVM3sg.rst b/Misc/NEWS.d/next/Security/2023-08-22-17-39-12.gh-issue-108310.fVM3sg.rst
new file mode 100644
index 0000000..403c77a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-08-22-17-39-12.gh-issue-108310.fVM3sg.rst
@@ -0,0 +1,7 @@
+Fixed an issue where instances of :class:`ssl.SSLSocket` were vulnerable to
+a bypass of the TLS handshake and included protections (like certificate
+verification) and treating sent unencrypted data as if it were
+post-handshake TLS encrypted data. Security issue reported as
+`CVE-2023-40217
+<https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40217>`_ by
+Aapo Oksman. Patch by Gregory P. Smith.
--
2.41.0
From 6fbb37c0b7ab8ce1db8e2e78df62d6e5c1e56766 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 23 Aug 2023 03:10:56 -0700
Subject: [PATCH 2/4] gh-108342: Break ref cycle in SSLSocket._create() exc
(GH-108344) (#108352)
Explicitly break a reference cycle when SSLSocket._create() raises an
exception. Clear the variable storing the exception, since the
exception traceback contains the variables and so creates a reference
cycle.
This test leak was introduced by the test added for the fix of GH-108310.
(cherry picked from commit 64f99350351bc46e016b2286f36ba7cd669b79e3)
Co-authored-by: Victor Stinner <vstinner@python.org>
---
Lib/ssl.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 288f237..67869c9 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -782,7 +782,11 @@ class SSLSocket(socket):
self.close()
except OSError:
pass
- raise notconn_pre_handshake_data_error
+ try:
+ raise notconn_pre_handshake_data_error
+ finally:
+ # Explicitly break the reference cycle.
+ notconn_pre_handshake_data_error = None
else:
connected = True
--
2.41.0
From 579d809e60e569f06c00844cc3a3f06a0e359603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Langa?= <lukasz@langa.pl>
Date: Thu, 24 Aug 2023 12:09:30 +0200
Subject: [PATCH 3/4] gh-108342: Make ssl TestPreHandshakeClose more reliable
(GH-108370) (#108408)
* In preauth tests of test_ssl, explicitly break reference cycles
invoving SingleConnectionTestServerThread to make sure that the
thread is deleted. Otherwise, the test marks the environment as
altered because the threading module sees a "dangling thread"
(SingleConnectionTestServerThread). This test leak was introduced
by the test added for the fix of issue gh-108310.
* Use support.SHORT_TIMEOUT instead of hardcoded 1.0 or 2.0 seconds
timeout.
* SingleConnectionTestServerThread.run() catchs TimeoutError
* Fix a race condition (missing synchronization) in
test_preauth_data_to_tls_client(): the server now waits until the
client connect() completed in call_after_accept().
* test_https_client_non_tls_response_ignored() calls server.join()
explicitly.
* Replace "localhost" with server.listener.getsockname()[0].
(cherry picked from commit 592bacb6fc0833336c0453e818e9b95016e9fd47)
---
Lib/test/test_ssl.py | 102 ++++++++++++++++++++++++++++++-------------
1 file changed, 71 insertions(+), 31 deletions(-)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index e24d11b..6332330 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -3953,12 +3953,16 @@ class TestPreHandshakeClose(unittest.TestCase):
class SingleConnectionTestServerThread(threading.Thread):
- def __init__(self, *, name, call_after_accept):
+ def __init__(self, *, name, call_after_accept, timeout=None):
self.call_after_accept = call_after_accept
self.received_data = b'' # set by .run()
self.wrap_error = None # set by .run()
self.listener = None # set by .start()
self.port = None # set by .start()
+ if timeout is None:
+ self.timeout = support.SHORT_TIMEOUT
+ else:
+ self.timeout = timeout
super().__init__(name=name)
def __enter__(self):
@@ -3981,13 +3985,19 @@ class TestPreHandshakeClose(unittest.TestCase):
self.ssl_ctx.load_cert_chain(certfile=ONLYCERT, keyfile=ONLYKEY)
self.listener = socket.socket()
self.port = support.bind_port(self.listener)
- self.listener.settimeout(2.0)
+ self.listener.settimeout(self.timeout)
self.listener.listen(1)
super().start()
def run(self):
- conn, address = self.listener.accept()
- self.listener.close()
+ try:
+ conn, address = self.listener.accept()
+ except TimeoutError:
+ # on timeout, just close the listener
+ return
+ finally:
+ self.listener.close()
+
with conn:
if self.call_after_accept(conn):
return
@@ -4015,8 +4025,13 @@ class TestPreHandshakeClose(unittest.TestCase):
# we're specifically trying to test. The way this test is written
# is known to work on Linux. We'll skip it anywhere else that it
# does not present as doing so.
- self.skipTest("Could not recreate conditions on {}: \
- err={}".format(sys.platform,err))
+ try:
+ self.skipTest("Could not recreate conditions on {}: \
+ err={}".format(sys.platform,err))
+ finally:
+ # gh-108342: Explicitly break the reference cycle
+ err = None
+
# If maintaining this conditional winds up being a problem.
# just turn this into an unconditional skip anything but Linux.
# The important thing is that our CI has the logic covered.
@@ -4027,7 +4042,7 @@ class TestPreHandshakeClose(unittest.TestCase):
def call_after_accept(unused):
server_accept_called.set()
- if not ready_for_server_wrap_socket.wait(2.0):
+ if not ready_for_server_wrap_socket.wait(support.SHORT_TIMEOUT):
raise RuntimeError("wrap_socket event never set, test may fail.")
return False # Tell the server thread to continue.
@@ -4049,20 +4064,31 @@ class TestPreHandshakeClose(unittest.TestCase):
ready_for_server_wrap_socket.set()
server.join()
+
wrap_error = server.wrap_error
- self.assertEqual(b"", server.received_data)
- self.assertIsInstance(wrap_error, OSError) # All platforms.
- self.non_linux_skip_if_other_okay_error(wrap_error)
- self.assertIsInstance(wrap_error, ssl.SSLError)
- self.assertIn("before TLS handshake with data", wrap_error.args[1])
- self.assertIn("before TLS handshake with data", wrap_error.reason)
- self.assertNotEqual(0, wrap_error.args[0])
- self.assertIsNone(wrap_error.library, msg="attr must exist")
+ server.wrap_error = None
+ try:
+ self.assertEqual(b"", server.received_data)
+ self.assertIsInstance(wrap_error, OSError) # All platforms.
+ self.non_linux_skip_if_other_okay_error(wrap_error)
+ self.assertIsInstance(wrap_error, ssl.SSLError)
+ self.assertIn("before TLS handshake with data", wrap_error.args[1])
+ self.assertIn("before TLS handshake with data", wrap_error.reason)
+ self.assertNotEqual(0, wrap_error.args[0])
+ self.assertIsNone(wrap_error.library, msg="attr must exist")
+ finally:
+ # gh-108342: Explicitly break the reference cycle
+ wrap_error = None
+ server = None
def test_preauth_data_to_tls_client(self):
+ server_can_continue_with_wrap_socket = threading.Event()
client_can_continue_with_wrap_socket = threading.Event()
def call_after_accept(conn_to_client):
+ if not server_can_continue_with_wrap_socket.wait(support.SHORT_TIMEOUT):
+ print("ERROR: test client took too long")
+
# This forces an immediate connection close via RST on .close().
set_socket_so_linger_on_with_zero_timeout(conn_to_client)
conn_to_client.send(
@@ -4084,8 +4110,10 @@ class TestPreHandshakeClose(unittest.TestCase):
with socket.socket() as client:
client.connect(server.listener.getsockname())
- if not client_can_continue_with_wrap_socket.wait(2.0):
- self.fail("test server took too long.")
+ server_can_continue_with_wrap_socket.set()
+
+ if not client_can_continue_with_wrap_socket.wait(support.SHORT_TIMEOUT):
+ self.fail("test server took too long")
ssl_ctx = ssl.create_default_context()
try:
tls_client = ssl_ctx.wrap_socket(
@@ -4099,24 +4127,31 @@ class TestPreHandshakeClose(unittest.TestCase):
tls_client.close()
server.join()
- self.assertEqual(b"", received_data)
- self.assertIsInstance(wrap_error, OSError) # All platforms.
- self.non_linux_skip_if_other_okay_error(wrap_error)
- self.assertIsInstance(wrap_error, ssl.SSLError)
- self.assertIn("before TLS handshake with data", wrap_error.args[1])
- self.assertIn("before TLS handshake with data", wrap_error.reason)
- self.assertNotEqual(0, wrap_error.args[0])
- self.assertIsNone(wrap_error.library, msg="attr must exist")
+ try:
+ self.assertEqual(b"", received_data)
+ self.assertIsInstance(wrap_error, OSError) # All platforms.
+ self.non_linux_skip_if_other_okay_error(wrap_error)
+ self.assertIsInstance(wrap_error, ssl.SSLError)
+ self.assertIn("before TLS handshake with data", wrap_error.args[1])
+ self.assertIn("before TLS handshake with data", wrap_error.reason)
+ self.assertNotEqual(0, wrap_error.args[0])
+ self.assertIsNone(wrap_error.library, msg="attr must exist")
+ finally:
+ # gh-108342: Explicitly break the reference cycle
+ wrap_error = None
+ server = None
def test_https_client_non_tls_response_ignored(self):
-
server_responding = threading.Event()
class SynchronizedHTTPSConnection(http.client.HTTPSConnection):
def connect(self):
+ # Call clear text HTTP connect(), not the encrypted HTTPS (TLS)
+ # connect(): wrap_socket() is called manually below.
http.client.HTTPConnection.connect(self)
+
# Wait for our fault injection server to have done its thing.
- if not server_responding.wait(1.0) and support.verbose:
+ if not server_responding.wait(support.SHORT_TIMEOUT) and support.verbose:
sys.stdout.write("server_responding event never set.")
self.sock = self._context.wrap_socket(
self.sock, server_hostname=self.host)
@@ -4131,29 +4166,34 @@ class TestPreHandshakeClose(unittest.TestCase):
server_responding.set()
return True # Tell the server to stop.
+ timeout = 2.0
server = self.SingleConnectionTestServerThread(
call_after_accept=call_after_accept,
- name="non_tls_http_RST_responder")
+ name="non_tls_http_RST_responder",
+ timeout=timeout)
server.__enter__() # starts it
self.addCleanup(server.__exit__) # ... & unittest.TestCase stops it.
# Redundant; call_after_accept sets SO_LINGER on the accepted conn.
set_socket_so_linger_on_with_zero_timeout(server.listener)
connection = SynchronizedHTTPSConnection(
- f"localhost",
+ server.listener.getsockname()[0],
port=server.port,
context=ssl.create_default_context(),
- timeout=2.0,
+ timeout=timeout,
)
+
# There are lots of reasons this raises as desired, long before this
# test was added. Sending the request requires a successful TLS wrapped
# socket; that fails if the connection is broken. It may seem pointless
# to test this. It serves as an illustration of something that we never
# want to happen... properly not happening.
- with self.assertRaises(OSError) as err_ctx:
+ with self.assertRaises(OSError):
connection.request("HEAD", "/test", headers={"Host": "localhost"})
response = connection.getresponse()
+ server.join()
+
def test_main(verbose=False):
if support.verbose:
--
2.41.0
From 2e8776245e9fb8ede784077df26684b4b53df0bc Mon Sep 17 00:00:00 2001
From: Charalampos Stratakis <cstratak@redhat.com>
Date: Mon, 25 Sep 2023 21:55:29 +0200
Subject: [PATCH 4/4] Downstream: Additional fixup for 3.6:
Use alternative for self.getblocking(), which was added in Python 3.7
see: https://docs.python.org/3/library/socket.html#socket.socket.getblocking
Set self._sslobj early to avoid AttributeError
---
Lib/ssl.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 67869c9..daedc82 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -690,6 +690,7 @@ class SSLSocket(socket):
suppress_ragged_eofs=True, npn_protocols=None, ciphers=None,
server_hostname=None,
_context=None, _session=None):
+ self._sslobj = None
if _context:
self._context = _context
@@ -755,7 +756,7 @@ class SSLSocket(socket):
if e.errno != errno.ENOTCONN:
raise
connected = False
- blocking = self.getblocking()
+ blocking = (self.gettimeout() != 0)
self.setblocking(False)
try:
# We are not connected so this is not supposed to block, but
--
2.41.0

@ -0,0 +1,143 @@
From c563f409ea30bcb0623d785428c9257917371b76 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Thu, 23 Jan 2020 06:49:19 -0800
Subject: [PATCH] bpo-39421: Fix posible crash in heapq with custom comparison
operators (GH-18118) (GH-18146)
(cherry picked from commit 79f89e6e5a659846d1068e8b1bd8e491ccdef861)
Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
---
Lib/test/test_heapq.py | 31 ++++++++++++++++
.../2020-01-22-15-53-37.bpo-39421.O3nG7u.rst | 2 ++
Modules/_heapqmodule.c | 35 ++++++++++++++-----
3 files changed, 59 insertions(+), 9 deletions(-)
create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-01-22-15-53-37.bpo-39421.O3nG7u.rst
diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py
index 2f8c648d84a58..7c3fb0210f69b 100644
--- a/Lib/test/test_heapq.py
+++ b/Lib/test/test_heapq.py
@@ -414,6 +414,37 @@ def test_heappop_mutating_heap(self):
with self.assertRaises((IndexError, RuntimeError)):
self.module.heappop(heap)
+ def test_comparison_operator_modifiying_heap(self):
+ # See bpo-39421: Strong references need to be taken
+ # when comparing objects as they can alter the heap
+ class EvilClass(int):
+ def __lt__(self, o):
+ heap.clear()
+ return NotImplemented
+
+ heap = []
+ self.module.heappush(heap, EvilClass(0))
+ self.assertRaises(IndexError, self.module.heappushpop, heap, 1)
+
+ def test_comparison_operator_modifiying_heap_two_heaps(self):
+
+ class h(int):
+ def __lt__(self, o):
+ list2.clear()
+ return NotImplemented
+
+ class g(int):
+ def __lt__(self, o):
+ list1.clear()
+ return NotImplemented
+
+ list1, list2 = [], []
+
+ self.module.heappush(list1, h(0))
+ self.module.heappush(list2, g(0))
+
+ self.assertRaises((IndexError, RuntimeError), self.module.heappush, list1, g(1))
+ self.assertRaises((IndexError, RuntimeError), self.module.heappush, list2, h(1))
class TestErrorHandlingPython(TestErrorHandling, TestCase):
module = py_heapq
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-01-22-15-53-37.bpo-39421.O3nG7u.rst b/Misc/NEWS.d/next/Core and Builtins/2020-01-22-15-53-37.bpo-39421.O3nG7u.rst
new file mode 100644
index 0000000000000..bae008150ee12
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-01-22-15-53-37.bpo-39421.O3nG7u.rst
@@ -0,0 +1,2 @@
+Fix possible crashes when operating with the functions in the :mod:`heapq`
+module and custom comparison operators.
diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c
index b499e1f668aae..0fb35ffe5ec48 100644
--- a/Modules/_heapqmodule.c
+++ b/Modules/_heapqmodule.c
@@ -29,7 +29,11 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
while (pos > startpos) {
parentpos = (pos - 1) >> 1;
parent = arr[parentpos];
+ Py_INCREF(newitem);
+ Py_INCREF(parent);
cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
+ Py_DECREF(parent);
+ Py_DECREF(newitem);
if (cmp < 0)
return -1;
if (size != PyList_GET_SIZE(heap)) {
@@ -71,10 +75,13 @@ siftup(PyListObject *heap, Py_ssize_t pos)
/* Set childpos to index of smaller child. */
childpos = 2*pos + 1; /* leftmost child position */
if (childpos + 1 < endpos) {
- cmp = PyObject_RichCompareBool(
- arr[childpos],
- arr[childpos + 1],
- Py_LT);
+ PyObject* a = arr[childpos];
+ PyObject* b = arr[childpos + 1];
+ Py_INCREF(a);
+ Py_INCREF(b);
+ cmp = PyObject_RichCompareBool(a, b, Py_LT);
+ Py_DECREF(a);
+ Py_DECREF(b);
if (cmp < 0)
return -1;
childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */
@@ -229,7 +236,10 @@ heappushpop(PyObject *self, PyObject *args)
return item;
}
- cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
+ PyObject* top = PyList_GET_ITEM(heap, 0);
+ Py_INCREF(top);
+ cmp = PyObject_RichCompareBool(top, item, Py_LT);
+ Py_DECREF(top);
if (cmp < 0)
return NULL;
if (cmp == 0) {
@@ -383,7 +393,11 @@ siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
while (pos > startpos) {
parentpos = (pos - 1) >> 1;
parent = arr[parentpos];
+ Py_INCREF(parent);
+ Py_INCREF(newitem);
cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
+ Py_DECREF(parent);
+ Py_DECREF(newitem);
if (cmp < 0)
return -1;
if (size != PyList_GET_SIZE(heap)) {
@@ -425,10 +439,13 @@ siftup_max(PyListObject *heap, Py_ssize_t pos)
/* Set childpos to index of smaller child. */
childpos = 2*pos + 1; /* leftmost child position */
if (childpos + 1 < endpos) {
- cmp = PyObject_RichCompareBool(
- arr[childpos + 1],
- arr[childpos],
- Py_LT);
+ PyObject* a = arr[childpos + 1];
+ PyObject* b = arr[childpos];
+ Py_INCREF(a);
+ Py_INCREF(b);
+ cmp = PyObject_RichCompareBool(a, b, Py_LT);
+ Py_DECREF(a);
+ Py_DECREF(b);
if (cmp < 0)
return -1;
childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */

@ -0,0 +1,560 @@
From cec66eefbee76ee95f252a52de0f5abc1b677f5b Mon Sep 17 00:00:00 2001
From: Lumir Balhar <lbalhar@redhat.com>
Date: Tue, 12 Dec 2023 13:03:42 +0100
Subject: [PATCH] [3.6] bpo-42103: Improve validation of Plist files.
(GH-22882) (GH-23118)
* Prevent some possible DoS attacks via providing invalid Plist files
with extremely large number of objects or collection sizes.
* Raise InvalidFileException for too large bytes and string size instead of returning garbage.
* Raise InvalidFileException instead of ValueError for specific invalid datetime (NaN).
* Raise InvalidFileException instead of TypeError for non-hashable dict keys.
* Add more tests for invalid Plist files..
(cherry picked from commit 34637a0ce21e7261b952fbd9d006474cc29b681f)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/plistlib.py | 34 +-
Lib/test/test_plistlib.py | 394 +++++++++++++++---
.../2020-10-23-19-20-14.bpo-42103.C5obK2.rst | 3 +
.../2020-10-23-19-19-30.bpo-42103.cILT66.rst | 2 +
4 files changed, 366 insertions(+), 67 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2020-10-23-19-20-14.bpo-42103.C5obK2.rst
create mode 100644 Misc/NEWS.d/next/Security/2020-10-23-19-19-30.bpo-42103.cILT66.rst
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index a918643..df1f346 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -626,7 +626,7 @@ class _BinaryPlistParser:
return self._read_object(top_object)
except (OSError, IndexError, struct.error, OverflowError,
- UnicodeDecodeError):
+ ValueError):
raise InvalidFileException()
def _get_size(self, tokenL):
@@ -642,7 +642,7 @@ class _BinaryPlistParser:
def _read_ints(self, n, size):
data = self._fp.read(size * n)
if size in _BINARY_FORMAT:
- return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
+ return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
else:
if not size or len(data) != size * n:
raise InvalidFileException()
@@ -701,19 +701,25 @@ class _BinaryPlistParser:
elif tokenH == 0x40: # data
s = self._get_size(tokenL)
- if self._use_builtin_types:
- result = self._fp.read(s)
- else:
- result = Data(self._fp.read(s))
+ result = self._fp.read(s)
+ if len(result) != s:
+ raise InvalidFileException()
+ if not self._use_builtin_types:
+ result = Data(result)
elif tokenH == 0x50: # ascii string
s = self._get_size(tokenL)
- result = self._fp.read(s).decode('ascii')
- result = result
+ data = self._fp.read(s)
+ if len(data) != s:
+ raise InvalidFileException()
+ result = data.decode('ascii')
elif tokenH == 0x60: # unicode string
- s = self._get_size(tokenL)
- result = self._fp.read(s * 2).decode('utf-16be')
+ s = self._get_size(tokenL) * 2
+ data = self._fp.read(s)
+ if len(data) != s:
+ raise InvalidFileException()
+ result = data.decode('utf-16be')
# tokenH == 0x80 is documented as 'UID' and appears to be used for
# keyed-archiving, not in plists.
@@ -737,9 +743,11 @@ class _BinaryPlistParser:
obj_refs = self._read_refs(s)
result = self._dict_type()
self._objects[ref] = result
- for k, o in zip(key_refs, obj_refs):
- result[self._read_object(k)] = self._read_object(o)
-
+ try:
+ for k, o in zip(key_refs, obj_refs):
+ result[self._read_object(k)] = self._read_object(o)
+ except TypeError:
+ raise InvalidFileException()
else:
raise InvalidFileException()
diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py
index d47c607..f71245d 100644
--- a/Lib/test/test_plistlib.py
+++ b/Lib/test/test_plistlib.py
@@ -1,5 +1,6 @@
# Copyright (C) 2003-2013 Python Software Foundation
+import struct
import unittest
import plistlib
import os
@@ -90,6 +91,284 @@ TESTDATA={
xQHHAsQC0gAAAAAAAAIBAAAAAAAAADkAAAAAAAAAAAAAAAAAAALs'''),
}
+INVALID_BINARY_PLISTS = [
+ ('too short data',
+ b''
+ ),
+ ('too large offset_table_offset and offset_size = 1',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x2a'
+ ),
+ ('too large offset_table_offset and nonstandard offset_size',
+ b'\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x03\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x2c'
+ ),
+ ('integer overflow in offset_table_offset',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\xff\xff\xff\xff\xff\xff\xff\xff'
+ ),
+ ('too large top_object',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('integer overflow in top_object',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\xff\xff\xff\xff\xff\xff\xff\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('too large num_objects and offset_size = 1',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('too large num_objects and nonstandard offset_size',
+ b'\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x03\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('extremally large num_objects (32 bit)',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x7f\xff\xff\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('extremally large num_objects (64 bit)',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\xff\xff\xff\xff\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('integer overflow in num_objects',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\xff\xff\xff\xff\xff\xff\xff\xff'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('offset_size = 0',
+ b'\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('ref_size = 0',
+ b'\xa1\x01\x00\x08\x0a'
+ b'\x00\x00\x00\x00\x00\x00\x01\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0b'
+ ),
+ ('too large offset',
+ b'\x00\x2a'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('integer overflow in offset',
+ b'\x00\xff\xff\xff\xff\xff\xff\xff\xff'
+ b'\x00\x00\x00\x00\x00\x00\x08\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x09'
+ ),
+ ('too large array size',
+ b'\xaf\x00\x01\xff\x00\x08\x0c'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0d'
+ ),
+ ('extremally large array size (32-bit)',
+ b'\xaf\x02\x7f\xff\xff\xff\x01\x00\x08\x0f'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x10'
+ ),
+ ('extremally large array size (64-bit)',
+ b'\xaf\x03\x00\x00\x00\xff\xff\xff\xff\xff\x01\x00\x08\x13'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x14'
+ ),
+ ('integer overflow in array size',
+ b'\xaf\x03\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x08\x13'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x14'
+ ),
+ ('too large reference index',
+ b'\xa1\x02\x00\x08\x0a'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0b'
+ ),
+ ('integer overflow in reference index',
+ b'\xa1\xff\xff\xff\xff\xff\xff\xff\xff\x00\x08\x11'
+ b'\x00\x00\x00\x00\x00\x00\x01\x08'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x12'
+ ),
+ ('too large bytes size',
+ b'\x4f\x00\x23\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0c'
+ ),
+ ('extremally large bytes size (32-bit)',
+ b'\x4f\x02\x7f\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0f'
+ ),
+ ('extremally large bytes size (64-bit)',
+ b'\x4f\x03\x00\x00\x00\xff\xff\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x13'
+ ),
+ ('integer overflow in bytes size',
+ b'\x4f\x03\xff\xff\xff\xff\xff\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x13'
+ ),
+ ('too large ASCII size',
+ b'\x5f\x00\x23\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0c'
+ ),
+ ('extremally large ASCII size (32-bit)',
+ b'\x5f\x02\x7f\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0f'
+ ),
+ ('extremally large ASCII size (64-bit)',
+ b'\x5f\x03\x00\x00\x00\xff\xff\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x13'
+ ),
+ ('integer overflow in ASCII size',
+ b'\x5f\x03\xff\xff\xff\xff\xff\xff\xff\xff\x41\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x13'
+ ),
+ ('invalid ASCII',
+ b'\x51\xff\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0a'
+ ),
+ ('too large UTF-16 size',
+ b'\x6f\x00\x13\x20\xac\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0e'
+ ),
+ ('extremally large UTF-16 size (32-bit)',
+ b'\x6f\x02\x4f\xff\xff\xff\x20\xac\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x11'
+ ),
+ ('extremally large UTF-16 size (64-bit)',
+ b'\x6f\x03\x00\x00\x00\xff\xff\xff\xff\xff\x20\xac\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x15'
+ ),
+ ('integer overflow in UTF-16 size',
+ b'\x6f\x03\xff\xff\xff\xff\xff\xff\xff\xff\x20\xac\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x15'
+ ),
+ ('invalid UTF-16',
+ b'\x61\xd8\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0b'
+ ),
+ ('non-hashable key',
+ b'\xd1\x01\x01\xa0\x08\x0b'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x02'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x0c'
+ ),
+ ('too large datetime (datetime overflow)',
+ b'\x33\x42\x50\x00\x00\x00\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x11'
+ ),
+ ('too large datetime (timedelta overflow)',
+ b'\x33\x42\xe0\x00\x00\x00\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x11'
+ ),
+ ('invalid datetime (Infinity)',
+ b'\x33\x7f\xf0\x00\x00\x00\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x11'
+ ),
+ ('invalid datetime (NaN)',
+ b'\x33\x7f\xf8\x00\x00\x00\x00\x00\x00\x08'
+ b'\x00\x00\x00\x00\x00\x00\x01\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x11'
+ ),
+]
class TestPlistlib(unittest.TestCase):
@@ -447,6 +726,21 @@ class TestPlistlib(unittest.TestCase):
class TestBinaryPlistlib(unittest.TestCase):
+ @staticmethod
+ def decode(*objects, offset_size=1, ref_size=1):
+ data = [b'bplist00']
+ offset = 8
+ offsets = []
+ for x in objects:
+ offsets.append(offset.to_bytes(offset_size, 'big'))
+ data.append(x)
+ offset += len(x)
+ tail = struct.pack('>6xBBQQQ', offset_size, ref_size,
+ len(objects), 0, offset)
+ data.extend(offsets)
+ data.append(tail)
+ return plistlib.loads(b''.join(data), fmt=plistlib.FMT_BINARY)
+
def test_nonstandard_refs_size(self):
# Issue #21538: Refs and offsets are 24-bit integers
data = (b'bplist00'
@@ -461,7 +755,7 @@ class TestBinaryPlistlib(unittest.TestCase):
def test_dump_duplicates(self):
# Test effectiveness of saving duplicated objects
- for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
+ for x in (None, False, True, 12345, 123.45, 'abcde', 'абвгд', b'abcde',
datetime.datetime(2004, 10, 26, 10, 33, 33),
plistlib.Data(b'abcde'), bytearray(b'abcde'),
[12, 345], (12, 345), {'12': 345}):
@@ -500,6 +794,20 @@ class TestBinaryPlistlib(unittest.TestCase):
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
self.assertIs(b['x'], b)
+ def test_deep_nesting(self):
+ for N in [300, 100000]:
+ chunks = [b'\xa1' + (i + 1).to_bytes(4, 'big') for i in range(N)]
+ try:
+ result = self.decode(*chunks, b'\x54seed', offset_size=4, ref_size=4)
+ except RecursionError:
+ pass
+ else:
+ for i in range(N):
+ self.assertIsInstance(result, list)
+ self.assertEqual(len(result), 1)
+ result = result[0]
+ self.assertEqual(result, 'seed')
+
def test_large_timestamp(self):
# Issue #26709: 32-bit timestamp out of range
for ts in -2**31-1, 2**31:
@@ -509,55 +817,37 @@ class TestBinaryPlistlib(unittest.TestCase):
data = plistlib.dumps(d, fmt=plistlib.FMT_BINARY)
self.assertEqual(plistlib.loads(data), d)
+ def test_load_singletons(self):
+ self.assertIs(self.decode(b'\x00'), None)
+ self.assertIs(self.decode(b'\x08'), False)
+ self.assertIs(self.decode(b'\x09'), True)
+ self.assertEqual(self.decode(b'\x0f'), b'')
+
+ def test_load_int(self):
+ self.assertEqual(self.decode(b'\x10\x00'), 0)
+ self.assertEqual(self.decode(b'\x10\xfe'), 0xfe)
+ self.assertEqual(self.decode(b'\x11\xfe\xdc'), 0xfedc)
+ self.assertEqual(self.decode(b'\x12\xfe\xdc\xba\x98'), 0xfedcba98)
+ self.assertEqual(self.decode(b'\x13\x01\x23\x45\x67\x89\xab\xcd\xef'),
+ 0x0123456789abcdef)
+ self.assertEqual(self.decode(b'\x13\xfe\xdc\xba\x98\x76\x54\x32\x10'),
+ -0x123456789abcdf0)
+
+ def test_unsupported(self):
+ unsupported = [*range(1, 8), *range(10, 15),
+ 0x20, 0x21, *range(0x24, 0x33), *range(0x34, 0x40)]
+ for i in [0x70, 0x90, 0xb0, 0xc0, 0xe0, 0xf0]:
+ unsupported.extend(i + j for j in range(16))
+ for token in unsupported:
+ with self.subTest(f'token {token:02x}'):
+ with self.assertRaises(plistlib.InvalidFileException):
+ self.decode(bytes([token]) + b'\x00'*16)
+
def test_invalid_binary(self):
- for data in [
- # too short data
- b'',
- # too large offset_table_offset and nonstandard offset_size
- b'\x00\x08'
- b'\x00\x00\x00\x00\x00\x00\x03\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x2a',
- # integer overflow in offset_table_offset
- b'\x00\x08'
- b'\x00\x00\x00\x00\x00\x00\x01\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\xff\xff\xff\xff\xff\xff\xff\xff',
- # offset_size = 0
- b'\x00\x08'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x09',
- # ref_size = 0
- b'\xa1\x01\x00\x08\x0a'
- b'\x00\x00\x00\x00\x00\x00\x01\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x02'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x0b',
- # integer overflow in offset
- b'\x00\xff\xff\xff\xff\xff\xff\xff\xff'
- b'\x00\x00\x00\x00\x00\x00\x08\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x09',
- # invalid ASCII
- b'\x51\xff\x08'
- b'\x00\x00\x00\x00\x00\x00\x01\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x0a',
- # invalid UTF-16
- b'\x61\xd8\x00\x08'
- b'\x00\x00\x00\x00\x00\x00\x01\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x01'
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x0b',
- ]:
- with self.assertRaises(plistlib.InvalidFileException):
- plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY)
+ for name, data in INVALID_BINARY_PLISTS:
+ with self.subTest(name):
+ with self.assertRaises(plistlib.InvalidFileException):
+ plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY)
class TestPlistlibDeprecated(unittest.TestCase):
@@ -655,9 +945,5 @@ class MiscTestCase(unittest.TestCase):
support.check__all__(self, plistlib, blacklist=blacklist)
-def test_main():
- support.run_unittest(TestPlistlib, TestPlistlibDeprecated, MiscTestCase)
-
-
if __name__ == '__main__':
- test_main()
+ unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-10-23-19-20-14.bpo-42103.C5obK2.rst b/Misc/NEWS.d/next/Library/2020-10-23-19-20-14.bpo-42103.C5obK2.rst
new file mode 100644
index 0000000..4eb694c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-10-23-19-20-14.bpo-42103.C5obK2.rst
@@ -0,0 +1,3 @@
+:exc:`~plistlib.InvalidFileException` and :exc:`RecursionError` are now
+the only errors caused by loading malformed binary Plist file (previously
+ValueError and TypeError could be raised in some specific cases).
diff --git a/Misc/NEWS.d/next/Security/2020-10-23-19-19-30.bpo-42103.cILT66.rst b/Misc/NEWS.d/next/Security/2020-10-23-19-19-30.bpo-42103.cILT66.rst
new file mode 100644
index 0000000..15d7b65
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2020-10-23-19-19-30.bpo-42103.cILT66.rst
@@ -0,0 +1,2 @@
+Prevented potential DoS attack via CPU and RAM exhaustion when processing
+malformed Apple Property List files in binary format.
--
2.43.0

@ -0,0 +1,88 @@
From 0d02ff99721f7650e39ba4c7d8fe06f412bbb591 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Wed, 13 Dec 2023 11:50:26 +0100
Subject: [PATCH] bpo-46623: Skip two test_zlib tests on s390x (GH-31096)
Skip test_pair() and test_speech128() of test_zlib on s390x since
they fail if zlib uses the s390x hardware accelerator.
---
Lib/test/test_zlib.py | 32 +++++++++++++++++++
.../2022-02-03-09-45-26.bpo-46623.vxzuhV.rst | 2 ++
2 files changed, 34 insertions(+)
create mode 100644 Misc/NEWS.d/next/Tests/2022-02-03-09-45-26.bpo-46623.vxzuhV.rst
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index b7170b4..770a425 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -1,6 +1,7 @@
import unittest
from test import support
import binascii
+import os
import pickle
import random
import sys
@@ -15,6 +16,35 @@ requires_Decompress_copy = unittest.skipUnless(
hasattr(zlib.decompressobj(), "copy"),
'requires Decompress.copy()')
+# bpo-46623: On s390x, when a hardware accelerator is used, using different
+# ways to compress data with zlib can produce different compressed data.
+# Simplified test_pair() code:
+#
+# def func1(data):
+# return zlib.compress(data)
+#
+# def func2(data)
+# co = zlib.compressobj()
+# x1 = co.compress(data)
+# x2 = co.flush()
+# return x1 + x2
+#
+# On s390x if zlib uses a hardware accelerator, func1() creates a single
+# "final" compressed block whereas func2() produces 3 compressed blocks (the
+# last one is a final block). On other platforms with no accelerator, func1()
+# and func2() produce the same compressed data made of a single (final)
+# compressed block.
+#
+# Only the compressed data is different, the decompression returns the original
+# data:
+#
+# zlib.decompress(func1(data)) == zlib.decompress(func2(data)) == data
+#
+# Make the assumption that s390x always has an accelerator to simplify the skip
+# condition. Windows doesn't have os.uname() but it doesn't support s390x.
+skip_on_s390x = unittest.skipIf(hasattr(os, 'uname') and os.uname().machine == 's390x',
+ 'skipped on s390x')
+
class VersionTestCase(unittest.TestCase):
@@ -174,6 +204,7 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
bufsize=zlib.DEF_BUF_SIZE),
HAMLET_SCENE)
+ @skip_on_s390x
def test_speech128(self):
# compress more data
data = HAMLET_SCENE * 128
@@ -225,6 +256,7 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
# Test compression object
+ @skip_on_s390x
def test_pair(self):
# straightforward compress/decompress objects
datasrc = HAMLET_SCENE * 128
diff --git a/Misc/NEWS.d/next/Tests/2022-02-03-09-45-26.bpo-46623.vxzuhV.rst b/Misc/NEWS.d/next/Tests/2022-02-03-09-45-26.bpo-46623.vxzuhV.rst
new file mode 100644
index 0000000..be085c0
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2022-02-03-09-45-26.bpo-46623.vxzuhV.rst
@@ -0,0 +1,2 @@
+Skip test_pair() and test_speech128() of test_zlib on s390x since they fail
+if zlib uses the s390x hardware accelerator. Patch by Victor Stinner.
--
2.43.0

@ -0,0 +1,750 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 15 Dec 2023 16:10:40 +0100
Subject: [PATCH] 00415: [CVE-2023-27043] gh-102988: Reject malformed addresses
in email.parseaddr() (#111116)
Detect email address parsing errors and return empty tuple to
indicate the parsing error (old API). Add an optional 'strict'
parameter to getaddresses() and parseaddr() functions. Patch by
Thomas Dwyer.
Co-Authored-By: Thomas Dwyer <github@tomd.tel>
---
Doc/library/email.utils.rst | 19 +-
Lib/email/utils.py | 151 ++++++++++++-
Lib/test/test_email/test_email.py | 204 +++++++++++++++++-
...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
4 files changed, 361 insertions(+), 21 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 63fae2ab84..d1e1898591 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -60,13 +60,18 @@ of the new API.
begins with angle brackets, they are stripped off.
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, *, strict=True)
Parse address -- which should be the value of some address-containing field such
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -84,12 +89,15 @@ of the new API.
Added the *charset* option.
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, *, strict=True)
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ :meth:`Message.get_all <email.message.Message.get_all>`.
+
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ Here's a simple example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -99,6 +107,9 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 39c2240607..f83b7e5d7e 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
+
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
return address
+def _iter_escaped_chars(addr):
+ pos = 0
+ escape = False
+ for pos, ch in enumerate(addr):
+ if escape:
+ yield (pos, '\\' + ch)
+ escape = False
+ elif ch == '\\':
+ escape = True
+ else:
+ yield (pos, ch)
+ if escape:
+ yield (pos, '\\')
-def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(fieldvalues)
- a = _AddressList(all)
- return a.addresslist
+
+def _strip_quoted_realnames(addr):
+ """Strip real names between quotes."""
+ if '"' not in addr:
+ # Fast path
+ return addr
+
+ start = 0
+ open_pos = None
+ result = []
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '"':
+ if open_pos is None:
+ open_pos = pos
+ else:
+ if start != open_pos:
+ result.append(addr[start:open_pos])
+ start = pos + 1
+ open_pos = None
+
+ if start < len(addr):
+ result.append(addr[start:])
+
+ return ''.join(result)
+
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If strict is true, use a strict parser which rejects malformed inputs.
+ """
+
+ # If strict is true, if the resulting list of parsed addresses is greater
+ # than the number of fieldvalues in the input list, a parsing error has
+ # occurred and consequently a list containing a single empty 2-tuple [('',
+ # '')] is returned in its place. This is done to avoid invalid output.
+ #
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+ # Safe output: [('', '')]
+
+ if not strict:
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ addr = COMMASPACE.join(fieldvalues)
+ a = _AddressList(addr)
+ result = _post_parse_validation(a.addresslist)
+
+ # Treat output as invalid if the number of addresses is not equal to the
+ # expected number of addresses.
+ n = 0
+ for v in fieldvalues:
+ # When a comma is used in the Real Name part it is not a deliminator.
+ # So strip those out before counting the commas.
+ v = _strip_quoted_realnames(v)
+ # Expected number of addresses: 1 + number of commas
+ n += 1 + v.count(',')
+ if len(result) != n:
+ return [('', '')]
+
+ return result
+
+
+def _check_parenthesis(addr):
+ # Ignore parenthesis in quoted real names.
+ addr = _strip_quoted_realnames(addr)
+
+ opens = 0
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '(':
+ opens += 1
+ elif ch == ')':
+ opens -= 1
+ if opens < 0:
+ return False
+ return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ if not _check_parenthesis(v):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
@@ -214,16 +330,33 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
"""
Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
+
+ If strict is True, use a strict parser which rejects malformed inputs.
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if not strict:
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return ('', '')
+ return addrs[0]
+
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index e4e40b612f..ce36efc1b1 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -19,6 +19,7 @@ except ImportError:
import email
import email.policy
+import email.utils
from email.charset import Charset
from email.header import Header, decode_header, make_header
@@ -3207,15 +3208,154 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_comma_in_name(self):
+ """GH-106669 regression test."""
+ self.assertEqual(
+ utils.getaddresses(
+ [
+ '"Bud, Person" <bperson@dom.ain>',
+ 'aperson@dom.ain (Al Person)',
+ '"Mariusz Felisiak" <to@example.com>',
+ ]
+ ),
+ [
+ ('Bud, Person', 'bperson@dom.ain'),
+ ('Al Person', 'aperson@dom.ain'),
+ ('Mariusz Felisiak', 'to@example.com'),
+ ],
+ )
+
+ def test_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
+ alice = 'alice@example.org'
+ bob = 'bob@example.com'
+ empty = ('', '')
+
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
+ # addresses: default behavior (strict=True) rejects malformed address,
+ # and strict=False which tolerates malformed address.
+ for invalid_separator, expected_non_strict in (
+ ('(', [(f'<{bob}>', alice)]),
+ (')', [('', alice), empty, ('', bob)]),
+ ('<', [('', alice), empty, ('', bob), empty]),
+ ('>', [('', alice), empty, ('', bob)]),
+ ('[', [('', f'{alice}[<{bob}>]')]),
+ (']', [('', alice), empty, ('', bob)]),
+ ('@', [empty, empty, ('', bob)]),
+ (';', [('', alice), empty, ('', bob)]),
+ (':', [('', alice), ('', bob)]),
+ ('.', [('', alice + '.'), ('', bob)]),
+ ('"', [('', alice), ('', f'<{bob}>')]),
+ ):
+ address = f'{alice}{invalid_separator}<{bob}>'
+ with self.subTest(address=address):
+ self.assertEqual(utils.getaddresses([address]),
+ [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ expected_non_strict)
+
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Comma (',') is treated differently depending on strict parameter.
+ # Comma without quotes.
+ address = f'{alice},<{bob}>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Real name between quotes containing comma.
+ address = '"Alice, alice@example.org" <bob@example.com>'
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Valid parenthesis in comments.
+ address = 'alice@example.org (Alice)'
+ expected_strict = ('Alice', 'alice@example.org')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Invalid parenthesis in comments.
+ address = 'alice@example.org )Alice('
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Two addresses with quotes separated by comma.
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Test email.utils.supports_strict_parsing attribute
+ self.assertEqual(email.utils.supports_strict_parsing, True)
+
def test_getaddresses_nasty(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(utils.getaddresses(
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ for addresses, expected in (
+ (['"Sürname, Firstname" <to@example.com>'],
+ [('Sürname, Firstname', 'to@example.com')]),
+
+ (['foo: ;'],
+ [('', '')]),
+
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
+
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
+
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
+ [('', '')]),
+
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
+
+ (['John Doe <jdoe@machine(comment). example>'],
+ [('John Doe (comment)', 'jdoe@machine.example')]),
+
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
+
+ (['Undisclosed recipients:;'],
+ [('', '')]),
+
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
+ ):
+ with self.subTest(addresses=addresses):
+ self.assertEqual(utils.getaddresses(addresses),
+ expected)
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ expected)
+
+ addresses = ['[]*-- =~$']
+ self.assertEqual(utils.getaddresses(addresses),
+ [('', '')])
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ [('', ''), ('', ''), ('', '*--')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
@@ -3397,6 +3537,54 @@ multipart/report
m = cls(*constructor, policy=email.policy.default)
self.assertIs(m.policy, email.policy.default)
+ def test_iter_escaped_chars(self):
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
+ [(0, 'a'),
+ (2, '\\\\'),
+ (3, 'b'),
+ (5, '\\"'),
+ (6, 'c'),
+ (8, '\\\\'),
+ (9, '"'),
+ (10, 'd')])
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
+ [(0, 'a'), (1, '\\')])
+
+ def test_strip_quoted_realnames(self):
+ def check(addr, expected):
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
+
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
+ ' <jane@example.net>, <john@example.net>')
+ check(r'"Jane \"Doe\"." <jane@example.net>',
+ ' <jane@example.net>')
+
+ # special cases
+ check(r'before"name"after', 'beforeafter')
+ check(r'before"name"', 'before')
+ check(r'b"name"', 'b') # single char
+ check(r'"name"after', 'after')
+ check(r'"name"a', 'a') # single char
+ check(r'"name"', '')
+
+ # no change
+ for addr in (
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
+ 'lone " quote',
+ ):
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
+
+
+ def test_check_parenthesis(self):
+ addr = 'alice@example.net'
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
+
+ # Ignore real name between quotes
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
+
# Test the iterator/generators
class TestIterators(TestEmailBase):
diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
new file mode 100644
index 0000000000..3d0e9e4078
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
@@ -0,0 +1,8 @@
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
+return ``('', '')`` 2-tuples in more situations where invalid email
+addresses are encountered instead of potentially inaccurate values. Add
+optional *strict* parameter to these two functions: use ``strict=False`` to
+get the old behavior, accept malformed inputs.
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
+Stinner to improve the CVE-2023-27043 fix.
From 4df4fad359c280f2328b98ea9b4414f244624a58 Mon Sep 17 00:00:00 2001
From: Lumir Balhar <lbalhar@redhat.com>
Date: Mon, 18 Dec 2023 20:15:33 +0100
Subject: [PATCH] Make it possible to disable strict parsing in email module
---
Doc/library/email.utils.rst | 26 +++++++++++
Lib/email/utils.py | 54 ++++++++++++++++++++++-
Lib/test/test_email/test_email.py | 72 ++++++++++++++++++++++++++++++-
3 files changed, 149 insertions(+), 3 deletions(-)
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index d1e1898591..7aef773b5f 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -69,6 +69,19 @@ of the new API.
If *strict* is true, use a strict parser which rejects malformed inputs.
+ The default setting for *strict* is set to ``True``, but you can override
+ it by setting the environment variable ``PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING``
+ to non-empty string.
+
+ Additionally, you can permanently set the default value for *strict* to
+ ``False`` by creating the configuration file ``/etc/python/email.cfg``
+ with the following content:
+
+ .. code-block:: ini
+
+ [email_addr_parsing]
+ PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true
+
.. versionchanged:: 3.13
Add *strict* optional parameter and reject malformed inputs by default.
@@ -97,6 +110,19 @@ of the new API.
If *strict* is true, use a strict parser which rejects malformed inputs.
+ The default setting for *strict* is set to ``True``, but you can override
+ it by setting the environment variable ``PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING``
+ to non-empty string.
+
+ Additionally, you can permanently set the default value for *strict* to
+ ``False`` by creating the configuration file ``/etc/python/email.cfg``
+ with the following content:
+
+ .. code-block:: ini
+
+ [email_addr_parsing]
+ PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true
+
Here's a simple example that gets all the recipients of a message::
from email.utils import getaddresses
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index f83b7e5d7e..b8e90ceb8e 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -48,6 +48,46 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
+_EMAIL_CONFIG_FILE = "/etc/python/email.cfg"
+_cached_strict_addr_parsing = None
+
+
+def _use_strict_email_parsing():
+ """"Cache implementation for _cached_strict_addr_parsing"""
+ global _cached_strict_addr_parsing
+ if _cached_strict_addr_parsing is None:
+ _cached_strict_addr_parsing = _use_strict_email_parsing_impl()
+ return _cached_strict_addr_parsing
+
+
+def _use_strict_email_parsing_impl():
+ """Returns True if strict email parsing is not disabled by
+ config file or env variable.
+ """
+ disabled = bool(os.environ.get("PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING"))
+ if disabled:
+ return False
+
+ try:
+ file = open(_EMAIL_CONFIG_FILE)
+ except FileNotFoundError:
+ pass
+ else:
+ with file:
+ import configparser
+ config = configparser.ConfigParser(
+ interpolation=None,
+ comment_prefixes=('#', ),
+
+ )
+ config.read_file(file)
+ disabled = config.getboolean('email_addr_parsing', "PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING", fallback=None)
+
+ if disabled:
+ return False
+
+ return True
+
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
@@ -149,7 +189,7 @@ def _strip_quoted_realnames(addr):
supports_strict_parsing = True
-def getaddresses(fieldvalues, *, strict=True):
+def getaddresses(fieldvalues, *, strict=None):
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
@@ -158,6 +198,11 @@ def getaddresses(fieldvalues, *, strict=True):
If strict is true, use a strict parser which rejects malformed inputs.
"""
+ # If default is used, it's True unless disabled
+ # by env variable or config file.
+ if strict == None:
+ strict = _use_strict_email_parsing()
+
# If strict is true, if the resulting list of parsed addresses is greater
# than the number of fieldvalues in the input list, a parsing error has
# occurred and consequently a list containing a single empty 2-tuple [('',
@@ -330,7 +375,7 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-def parseaddr(addr, *, strict=True):
+def parseaddr(addr, *, strict=None):
"""
Parse addr into its constituent realname and email address parts.
@@ -339,6 +384,11 @@ def parseaddr(addr, *, strict=True):
If strict is True, use a strict parser which rejects malformed inputs.
"""
+ # If default is used, it's True unless disabled
+ # by env variable or config file.
+ if strict == None:
+ strict = _use_strict_email_parsing()
+
if not strict:
addrs = _AddressList(addr).addresslist
if not addrs:
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index ce36efc1b1..05ea201b68 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -7,6 +7,9 @@ import time
import base64
import unittest
import textwrap
+import contextlib
+import tempfile
+import os
from io import StringIO, BytesIO
from itertools import chain
@@ -41,7 +44,7 @@ from email import iterators
from email import base64mime
from email import quoprimime
-from test.support import unlink, start_threads
+from test.support import unlink, start_threads, EnvironmentVarGuard, swap_attr
from test.test_email import openfile, TestEmailBase
# These imports are documented to work, but we are testing them using a
@@ -3313,6 +3316,73 @@ Foo
# Test email.utils.supports_strict_parsing attribute
self.assertEqual(email.utils.supports_strict_parsing, True)
+ def test_parsing_errors_strict_set_via_env_var(self):
+ address = 'alice@example.org )Alice('
+ empty = ('', '')
+
+ # Reset cached default value to make the function
+ # reload the config file provided below.
+ utils._cached_strict_addr_parsing = None
+
+ # Strict disabled via env variable, old behavior expected
+ with EnvironmentVarGuard() as environ:
+ environ["PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING"] = "1"
+
+ self.assertEqual(utils.getaddresses([address]),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), ('', address))
+
+ # Clear cache again
+ utils._cached_strict_addr_parsing = None
+
+ # Default strict=True, empty result expected
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.parseaddr([address]), empty)
+
+ # Clear cache again
+ utils._cached_strict_addr_parsing = None
+
+ # Empty string in env variable = strict parsing enabled (default)
+ with EnvironmentVarGuard() as environ:
+ environ["PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING"] = ""
+
+ # Default strict=True, empty result expected
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.parseaddr([address]), empty)
+
+ @contextlib.contextmanager
+ def _email_strict_parsing_conf(self):
+ """Context for the given email strict parsing configured in config file"""
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ filename = os.path.join(tmpdirname, 'conf.cfg')
+ with swap_attr(utils, "_EMAIL_CONFIG_FILE", filename):
+ with open(filename, 'w') as file:
+ file.write('[email_addr_parsing]\n')
+ file.write('PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true')
+ utils._EMAIL_CONFIG_FILE = filename
+ yield
+
+ def test_parsing_errors_strict_disabled_via_config_file(self):
+ address = 'alice@example.org )Alice('
+ empty = ('', '')
+
+ # Reset cached default value to make the function
+ # reload the config file provided below.
+ utils._cached_strict_addr_parsing = None
+
+ # Strict disabled via config file, old results expected
+ with self._email_strict_parsing_conf():
+ self.assertEqual(utils.getaddresses([address]),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), ('', address))
+
+ # Clear cache again
+ utils._cached_strict_addr_parsing = None
+
+ # Default strict=True, empty result expected
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.parseaddr([address]), empty)
+
def test_getaddresses_nasty(self):
for addresses, expected in (
(['"Sürname, Firstname" <to@example.com>'],
--
2.43.0

@ -0,0 +1,62 @@
"""Checks if all *.pyc and *.pyo files have later mtime than their *.py files."""
import importlib.util
import os
import sys
# list of test and other files that we expect not to have bytecode
not_compiled = [
'/usr/bin/pathfix.py',
'test/bad_coding.py',
'test/bad_coding2.py',
'test/badsyntax_3131.py',
'test/badsyntax_future3.py',
'test/badsyntax_future4.py',
'test/badsyntax_future5.py',
'test/badsyntax_future6.py',
'test/badsyntax_future7.py',
'test/badsyntax_future8.py',
'test/badsyntax_future9.py',
'test/badsyntax_future10.py',
'test/badsyntax_async1.py',
'test/badsyntax_async2.py',
'test/badsyntax_async3.py',
'test/badsyntax_async4.py',
'test/badsyntax_async5.py',
'test/badsyntax_async6.py',
'test/badsyntax_async7.py',
'test/badsyntax_async8.py',
'test/badsyntax_async9.py',
'test/badsyntax_pep3120.py',
'lib2to3/tests/data/bom.py',
'lib2to3/tests/data/crlf.py',
'lib2to3/tests/data/different_encoding.py',
'lib2to3/tests/data/false_encoding.py',
'lib2to3/tests/data/py2_test_grammar.py',
'.debug-gdb.py',
]
failed = 0
def bytecode_expected(source):
for f in not_compiled:
if source.endswith(f):
return False
return True
compiled = filter(lambda f: bytecode_expected(f), sys.argv[1:])
for f in compiled:
# check both pyo and pyc
to_check = map(lambda b: importlib.util.cache_from_source(f, b), (True, False))
f_mtime = os.path.getmtime(f)
for c in to_check:
c_mtime = os.path.getmtime(c)
if c_mtime < f_mtime:
sys.stderr.write('Failed bytecompilation timestamps check: ')
sys.stderr.write('Bytecode file {} is older than source file {}.\n'.format(c, f))
failed += 1
if failed:
sys.stderr.write('\n{} files failed bytecompilation timestamps check.\n'.format(failed))
sys.exit(1)

@ -0,0 +1,28 @@
#! /bin/bash -ex
# Download a release of Python (if missing) and remove .exe files from it
version=$1
if [ -z "${version}" ]; then
echo "Usage: $0 VERSION" >& 2
echo "" >& 2
echo "example: $0 3.6.6" >& 2
exit 1
fi
versionedname=Python-${version}
orig_archive=${versionedname}.tar.xz
new_archive=${versionedname}-noexe.tar.xz
if [ ! -e ${orig_archive} ]; then
wget -N https://www.python.org/ftp/python/${version}/${orig_archive}
fi
deleted_names=$(tar --list -Jf ${orig_archive} | grep '\.exe$')
# tar --delete does not operate on compressed archives, so do
# xz compression/decompression explicitly
xz --decompress --stdout ${orig_archive} | \
tar --delete -v ${deleted_names} | \
xz --compress --stdout -3 -T0 > ${new_archive}

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Copyright 2017 Zbigniew Jędrzejewski-Szmek -->
<application>
<id type="desktop">idle3.desktop</id>
<name>IDLE3</name>
<metadata_licence>CC0</metadata_licence>
<project_license>Python-2.0</project_license>
<summary>Python 3 Integrated Development and Learning Environment</summary>
<description>
<p>
IDLE is Pythons Integrated Development and Learning Environment.
The GUI is uniform between Windows, Unix, and Mac OS X.
IDLE provides an easy way to start writing, running, and debugging
Python code.
</p>
<p>
IDLE is written in pure Python, and uses the tkinter GUI toolkit.
It provides:
</p>
<ul>
<li>a Python shell window (interactive interpreter) with colorizing of code input, output, and error messages,</li>
<li>a multi-window text editor with multiple undo, Python colorizing, smart indent, call tips, auto completion, and other features,</li>
<li>search within any window, replace within editor windows, and search through multiple files (grep),</li>
<li>a debugger with persistent breakpoints, stepping, and viewing of global and local namespaces.</li>
</ul>
</description>
<url type="homepage">https://docs.python.org/3/library/idle.html</url>
<screenshots>
<screenshot type="default">http://in.waw.pl/~zbyszek/fedora/idle3-appdata/idle3-main-window.png</screenshot>
<screenshot>http://in.waw.pl/~zbyszek/fedora/idle3-appdata/idle3-class-browser.png</screenshot>
<screenshot>http://in.waw.pl/~zbyszek/fedora/idle3-appdata/idle3-code-viewer.png</screenshot>
</screenshots>
<update_contact>zbyszek@in.waw.pl</update_contact>
</application>

@ -0,0 +1,11 @@
[Desktop Entry]
Version=1.0
Name=IDLE 3
Comment=Python 3 Integrated Development and Learning Environment
Exec=idle3 %F
TryExec=idle3
Terminal=false
Type=Application
Icon=idle3
Categories=Development;IDE;
MimeType=text/x-python;

@ -0,0 +1,8 @@
#! /bin/bash
echo "For more information about this script,"
echo "please see the manual page of the same name."
echo "Run: man unversioned-python"
exit 2

@ -0,0 +1,57 @@
.\" unversioned-python.8
.TH UNVERSIONED-PYTHON 8 "17 September 2018"
.SH NAME
unversioned-python \- info on how to set up the `python` command.
.SH SYNOPSIS
.B unversioned-python
.SH DESCRIPTION
.B unversioned-python
The "unversioned" `python` command (/usr/bin/python) is missing by default.
We recommend using `python3` or `python2` instead.
If using the explicit versioned command is inconvenient,
you can use `alternatives` to configure `python` to launch
either Python 3 or Python 2.
Note: The `python3` or `python2` package needs to be installed before its
functionality is selected.
.SH EXAXPLES
.B alternatives
.B --config
.IR python
Interactively select what the `python` command runs.
.B alternatives
.B --set
.IR python
.IR /usr/bin/python3
Configure the `python` command to run Python 3
Note: this is non-standard behavior according to [PEP 394].
.B alternatives
.B --set
.IR python
.IR /usr/bin/python2
Configure the `python` command to run Python 2
Note: please review the support lifecycle of python2 before relying on it
.B alternatives
.B --auto
.IR python
Undo configuration changes and revert to the default (missing `python` command)
.SH LINKS
.B [PEP 394]:
.IR https://www.python.org/dev/peps/pep-0394/

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save