From be21651a1fada038b8ca00938d063fbb3336b989 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 16 Aug 2023 17:23:26 +0200 Subject: [PATCH 2/7] Revert "Make more difficult sanitize of the expression string before eval" This reverts commit 00b035c78ca5ac209b58b56b5dcc99596cac423c. --- ANNOUNCE.rst | 23 ++--------------------- RELEASE_NOTES.rst | 19 +------------------ doc/user_guide.rst | 27 ++++++++++----------------- numexpr/necompiler.py | 27 ++++++++------------------- numexpr/tests/test_numexpr.py | 18 ++---------------- 5 files changed, 23 insertions(+), 91 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 4e9070f2cc..f038df4f44 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -4,10 +4,7 @@ Announcing NumExpr 2.8.5 Hi everyone, -In 2.8.5 we have added a new function, `validate` which checks an expression `ex` -for validity, for usage where the program is parsing a user input. There are also -consequences for this sort of usage, since `eval(ex)` is called, and as such we -do some string sanitization as described below. +**Under development.** Project documentation is available at: @@ -16,23 +13,7 @@ http://numexpr.readthedocs.io/ Changes from 2.8.4 to 2.8.5 --------------------------- -* A `validate` function has been added. This function checks the inputs, returning - `None` on success or raising an exception on invalid inputs. This function was - added as numerous projects seem to be using NumExpr for parsing user inputs. - `re_evaluate` may be called directly following `validate`. -* As an addendum to the use of NumExpr for parsing user inputs, is that NumExpr - calls `eval` on the inputs. A regular expression is now applied to help sanitize - the input expression string, forbidding '__', ':', and ';'. Attribute access - is also banned except for '.r' for real and '.i' for imag. -* Thanks to timbrist for a fix to behavior of NumExpr with integers to negative - powers. NumExpr was pre-checking integer powers for negative values, which - was both inefficient and causing parsing errors in some situations. Now NumExpr - will simply return 0 as a result for such cases. While NumExpr generally tries - to follow NumPy behavior, performance is also critical. -* Thanks to peadar for some fixes to how NumExpr launches threads for embedded - applications. -* Thanks to de11n for making parsing of the `site.cfg` for MKL consistent among - all shared platforms. +**Under development.** What's Numexpr? diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 9cf5d3977b..4929a42e12 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,24 +5,7 @@ Release notes for NumExpr 2.8 series Changes from 2.8.4 to 2.8.5 --------------------------- -* A `validate` function has been added. This function checks the inputs, returning - `None` on success or raising an exception on invalid inputs. This function was - added as numerous projects seem to be using NumExpr for parsing user inputs. - `re_evaluate` may be called directly following `validate`. -* As an addendum to the use of NumExpr for parsing user inputs, is that NumExpr - calls `eval` on the inputs. A regular expression is now applied to help sanitize - the input expression string, forbidding '__', ':', and ';'. Attribute access - is also banned except for '.r' for real and '.i' for imag. -* Thanks to timbrist for a fix to behavior of NumExpr with integers to negative - powers. NumExpr was pre-checking integer powers for negative values, which - was both inefficient and causing parsing errors in some situations. Now NumExpr - will simply return 0 as a result for such cases. While NumExpr generally tries - to follow NumPy behavior, performance is also critical. -* Thanks to peadar for some fixes to how NumExpr launches threads for embedded - applications. -* Thanks to de11n for making parsing of the `site.cfg` for MKL consistent among - all shared platforms. - +**Under development.** Changes from 2.8.3 to 2.8.4 --------------------------- diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 3a3cf63d9c..74306eb658 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -1,7 +1,7 @@ -NumExpr 2.8 User Guide +NumExpr 2.0 User Guide ====================== -The NumExpr package supplies routines for the fast evaluation of +The :code:`numexpr` package supplies routines for the fast evaluation of array expressions elementwise by using a vector-based virtual machine. @@ -11,33 +11,23 @@ Using it is simple:: >>> import numexpr as ne >>> a = np.arange(10) >>> b = np.arange(0, 20, 2) - >>> c = ne.evaluate('2*a + 3*b') + >>> c = ne.evaluate("2*a+3*b") >>> c array([ 0, 8, 16, 24, 32, 40, 48, 56, 64, 72]) -It is also possible to use NumExpr to validate an expression:: - - >>> ne.validate('2*a + 3*b') - -which returns `None` on success or raises an exception on invalid inputs. - -and it can also re_evaluate an expression:: - - >>> b = np.arange(0, 40, 4) - >>> ne.re_evaluate() - Building -------- -*NumExpr* requires Python_ 3.7 or greater, and NumPy_ 1.13 or greater. It is +*NumExpr* requires Python_ 2.6 or greater, and NumPy_ 1.7 or greater. It is built in the standard Python way: .. code-block:: bash - $ pip install . + $ python setup.py build + $ python setup.py install -You must have a C-compiler (i.e. MSVC Build tools on Windows and GCC on Linux) installed. +You must have a C-compiler (i.e. MSVC on Windows and GCC on Linux) installed. Then change to a directory that is not the repository directory (e.g. `/tmp`) and test :code:`numexpr` with: @@ -278,6 +268,9 @@ General routines * :code:`detect_number_of_cores()`: Detects the number of cores on a system. + + + Intel's VML specific support routines ------------------------------------- diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index cbf290932b..fef886baf5 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -260,17 +260,15 @@ class Immediate(Register): def __str__(self): return 'Immediate(%d)' % (self.node.value,) - -_forbidden_re = re.compile('[\;[\:]|__|\.[abcdefghjklmnopqstuvwxyzA-Z_]') +_forbidden_re = re.compile('[\;[\:]|__') def stringToExpression(s, types, context): """Given a string, convert it to a tree of ExpressionNode's. """ # sanitize the string for obvious attack vectors that NumExpr cannot # parse into its homebrew AST. This is to protect the call to `eval` below. - # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'. - # We cannot ban `.real` or `.imag` however... - no_whitespace = re.sub(r'\s+', '', s) - if _forbidden_re.search(no_whitespace) is not None: + # We forbid `;`, `:`. `[` and `__` + # We would like to forbid `.` but it is both a reference and decimal point. + if _forbidden_re.search(s) is not None: raise ValueError(f'Expression {s} has forbidden control characters.') old_ctx = expressions._context.get_current_context() @@ -768,6 +766,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): _names_cache = CacheDict(256) _numexpr_cache = CacheDict(256) _numexpr_last = {} +_numexpr_sanity = set() evaluate_lock = threading.Lock() # MAYBE: decorate this function to add attributes instead of having the @@ -829,13 +828,6 @@ def validate(ex: str, _frame_depth: int The calling frame depth. Unless you are a NumExpr developer you should not set this value. - - Note - ---- - Both `validate` and by extension `evaluate` call `eval(ex)`, which is - potentially dangerous on unsanitized inputs. As such, NumExpr does some - sanitization, banning the character ':;[', the dunder '__', and attribute - access to all but '.r' for real and '.i' for imag access to complex numbers. """ global _numexpr_last @@ -865,6 +857,8 @@ def validate(ex: str, kwargs = {'out': out, 'order': order, 'casting': casting, 'ex_uses_vml': ex_uses_vml} _numexpr_last = dict(ex=compiled_ex, argnames=names, kwargs=kwargs) + # with evaluate_lock: + # return compiled_ex(*arguments, **kwargs) except Exception as e: return e return None @@ -924,12 +918,7 @@ def evaluate(ex: str, The calling frame depth. Unless you are a NumExpr developer you should not set this value. - Note - ---- - Both `validate` and by extension `evaluate` call `eval(ex)`, which is - potentially dangerous on unsanitized inputs. As such, NumExpr does some - sanitization, banning the character ':;[', the dunder '__', and attribute - access to all but '.r' for real and '.i' for imag access to complex numbers. + """ # We could avoid code duplication if we called validate and then re_evaluate # here, but they we have difficulties with the `sys.getframe(2)` call in diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index a9f917fccd..ebc41c8d54 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -536,27 +536,13 @@ class test_evaluate(TestCase): # Forbid semicolon try: - evaluate('import os;') + evaluate('import os; os.cpu_count()') except ValueError: pass else: self.fail() - # Attribute access - try: - evaluate('os.cpucount()') - except ValueError: - pass - else: - self.fail() - - # But decimal point must pass - a = 3.0 - evaluate('a*2.') - evaluate('2.+a') - - - + # I struggle to come up with cases for our ban on `'` and `"`