From 3f01ced0b5051798516fc65f5fac10ffd15dbce6 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Wed, 10 Jan 2024 08:53:53 +0100 Subject: [PATCH] Make it possible to disable strict parsing in email module --- Doc/library/email.utils.rst | 26 +++++++++++ Lib/email/utils.py | 54 +++++++++++++++++++++- Lib/test/test_email/test_email.py | 74 ++++++++++++++++++++++++++++++- 3 files changed, 150 insertions(+), 4 deletions(-) diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 97ddf49..0c9bf53 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -69,6 +69,19 @@ of the new API. If *strict* is true, use a strict parser which rejects malformed inputs. + The default setting for *strict* is set to ``True``, but you can override + it by setting the environment variable ``PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING`` + to non-empty string. + + Additionally, you can permanently set the default value for *strict* to + ``False`` by creating the configuration file ``/etc/python/email.cfg`` + with the following content: + + .. code-block:: ini + + [email_addr_parsing] + PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true + .. versionchanged:: 3.11.10 Add *strict* optional parameter and reject malformed inputs by default. @@ -97,6 +110,19 @@ of the new API. If *strict* is true, use a strict parser which rejects malformed inputs. + The default setting for *strict* is set to ``True``, but you can override + it by setting the environment variable ``PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING`` + to non-empty string. + + Additionally, you can permanently set the default value for *strict* to + ``False`` by creating the configuration file ``/etc/python/email.cfg`` + with the following content: + + .. code-block:: ini + + [email_addr_parsing] + PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true + Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 94ead0e..09a414c 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -48,6 +48,46 @@ TICK = "'" specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') +_EMAIL_CONFIG_FILE = "/etc/python/email.cfg" +_cached_strict_addr_parsing = None + + +def _use_strict_email_parsing(): + """"Cache implementation for _cached_strict_addr_parsing""" + global _cached_strict_addr_parsing + if _cached_strict_addr_parsing is None: + _cached_strict_addr_parsing = _use_strict_email_parsing_impl() + return _cached_strict_addr_parsing + + +def _use_strict_email_parsing_impl(): + """Returns True if strict email parsing is not disabled by + config file or env variable. + """ + disabled = bool(os.environ.get("PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING")) + if disabled: + return False + + try: + file = open(_EMAIL_CONFIG_FILE) + except FileNotFoundError: + pass + else: + with file: + import configparser + config = configparser.ConfigParser( + interpolation=None, + comment_prefixes=('#', ), + + ) + config.read_file(file) + disabled = config.getboolean('email_addr_parsing', "PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING", fallback=None) + + if disabled: + return False + + return True + def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" @@ -149,7 +189,7 @@ def _strip_quoted_realnames(addr): supports_strict_parsing = True -def getaddresses(fieldvalues, *, strict=True): +def getaddresses(fieldvalues, *, strict=None): """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in @@ -158,6 +198,11 @@ def getaddresses(fieldvalues, *, strict=True): If strict is true, use a strict parser which rejects malformed inputs. """ + # If default is used, it's True unless disabled + # by env variable or config file. + if strict == None: + strict = _use_strict_email_parsing() + # If strict is true, if the resulting list of parsed addresses is greater # than the number of fieldvalues in the input list, a parsing error has # occurred and consequently a list containing a single empty 2-tuple [('', @@ -321,7 +366,7 @@ def parsedate_to_datetime(data): tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr, *, strict=True): +def parseaddr(addr, *, strict=None): """ Parse addr into its constituent realname and email address parts. @@ -330,6 +375,11 @@ def parseaddr(addr, *, strict=True): If strict is True, use a strict parser which rejects malformed inputs. """ + # If default is used, it's True unless disabled + # by env variable or config file. + if strict == None: + strict = _use_strict_email_parsing() + if not strict: addrs = _AddressList(addr).addresslist if not addrs: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index ad60ed3..f85da56 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -8,6 +8,9 @@ import base64 import unittest import textwrap import warnings +import contextlib +import tempfile +import os from io import StringIO, BytesIO from itertools import chain @@ -41,8 +44,8 @@ from email import quoprimime from email import utils from test import support -from test.support import threading_helper -from test.support.os_helper import unlink +from test.support import threading_helper, swap_attr +from test.support.os_helper import unlink, EnvironmentVarGuard from test.test_email import openfile, TestEmailBase # These imports are documented to work, but we are testing them using a @@ -3442,6 +3445,73 @@ Foo # Test email.utils.supports_strict_parsing attribute self.assertEqual(email.utils.supports_strict_parsing, True) + def test_parsing_errors_strict_set_via_env_var(self): + address = 'alice@example.org )Alice(' + empty = ('', '') + + # Reset cached default value to make the function + # reload the config file provided below. + utils._cached_strict_addr_parsing = None + + # Strict disabled via env variable, old behavior expected + with EnvironmentVarGuard() as environ: + environ["PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING"] = "1" + + self.assertEqual(utils.getaddresses([address]), + [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) + self.assertEqual(utils.parseaddr([address]), ('', address)) + + # Clear cache again + utils._cached_strict_addr_parsing = None + + # Default strict=True, empty result expected + self.assertEqual(utils.getaddresses([address]), [empty]) + self.assertEqual(utils.parseaddr([address]), empty) + + # Clear cache again + utils._cached_strict_addr_parsing = None + + # Empty string in env variable = strict parsing enabled (default) + with EnvironmentVarGuard() as environ: + environ["PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING"] = "" + + # Default strict=True, empty result expected + self.assertEqual(utils.getaddresses([address]), [empty]) + self.assertEqual(utils.parseaddr([address]), empty) + + @contextlib.contextmanager + def _email_strict_parsing_conf(self): + """Context for the given email strict parsing configured in config file""" + with tempfile.TemporaryDirectory() as tmpdirname: + filename = os.path.join(tmpdirname, 'conf.cfg') + with swap_attr(utils, "_EMAIL_CONFIG_FILE", filename): + with open(filename, 'w') as file: + file.write('[email_addr_parsing]\n') + file.write('PYTHON_EMAIL_DISABLE_STRICT_ADDR_PARSING = true') + utils._EMAIL_CONFIG_FILE = filename + yield + + def test_parsing_errors_strict_disabled_via_config_file(self): + address = 'alice@example.org )Alice(' + empty = ('', '') + + # Reset cached default value to make the function + # reload the config file provided below. + utils._cached_strict_addr_parsing = None + + # Strict disabled via config file, old results expected + with self._email_strict_parsing_conf(): + self.assertEqual(utils.getaddresses([address]), + [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) + self.assertEqual(utils.parseaddr([address]), ('', address)) + + # Clear cache again + utils._cached_strict_addr_parsing = None + + # Default strict=True, empty result expected + self.assertEqual(utils.getaddresses([address]), [empty]) + self.assertEqual(utils.parseaddr([address]), empty) + def test_getaddresses_nasty(self): for addresses, expected in ( (['"Sürname, Firstname" '], -- 2.46.0