You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-numexpr/SOURCES/0003-Revert-Add-in-protecti...

184 lines
6.3 KiB

From aa18c52226ddf3d5a3448702fc2b1a14b0ee3c2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Wed, 16 Aug 2023 17:23:36 +0200
Subject: [PATCH 3/7] Revert "Add in protections against call to
`eval(expression)`"
This reverts commit 4b2d89cf14e75030d27629925b9998e1e91d23c7.
---
numexpr/necompiler.py | 26 +++++++-----------
numexpr/tests/test_numexpr.py | 50 ++++-------------------------------
2 files changed, 15 insertions(+), 61 deletions(-)
diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py
index fef886baf5..37052acadb 100644
--- a/numexpr/necompiler.py
+++ b/numexpr/necompiler.py
@@ -13,7 +13,6 @@ import __future__
import sys
import numpy
import threading
-import re
is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE
from numexpr import interpreter, expressions, use_vml
@@ -260,17 +259,10 @@ class Immediate(Register):
def __str__(self):
return 'Immediate(%d)' % (self.node.value,)
-_forbidden_re = re.compile('[\;[\:]|__')
+
def stringToExpression(s, types, context):
"""Given a string, convert it to a tree of ExpressionNode's.
"""
- # sanitize the string for obvious attack vectors that NumExpr cannot
- # parse into its homebrew AST. This is to protect the call to `eval` below.
- # We forbid `;`, `:`. `[` and `__`
- # We would like to forbid `.` but it is both a reference and decimal point.
- if _forbidden_re.search(s) is not None:
- raise ValueError(f'Expression {s} has forbidden control characters.')
-
old_ctx = expressions._context.get_current_context()
try:
expressions._context.set_new_context(context)
@@ -293,10 +285,8 @@ def stringToExpression(s, types, context):
t = types.get(name, default_type)
names[name] = expressions.VariableNode(name, type_to_kind[t])
names.update(expressions.functions)
-
# now build the expression
ex = eval(c, names)
-
if expressions.isConstant(ex):
ex = expressions.ConstantNode(ex, expressions.getKind(ex))
elif not isinstance(ex, expressions.ExpressionNode):
@@ -621,7 +611,9 @@ def NumExpr(ex, signature=(), **kwargs):
Returns a `NumExpr` object containing the compiled function.
"""
-
+ # NumExpr can be called either directly by the end-user, in which case
+ # kwargs need to be sanitized by getContext, or by evaluate,
+ # in which case kwargs are in already sanitized.
# In that case _frame_depth is wrong (it should be 2) but it doesn't matter
# since it will not be used (because truediv='auto' has already been
# translated to either True or False).
@@ -766,7 +758,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
_names_cache = CacheDict(256)
_numexpr_cache = CacheDict(256)
_numexpr_last = {}
-_numexpr_sanity = set()
+
evaluate_lock = threading.Lock()
# MAYBE: decorate this function to add attributes instead of having the
@@ -869,7 +861,7 @@ def evaluate(ex: str,
out: numpy.ndarray = None,
order: str = 'K',
casting: str = 'safe',
- _frame_depth: int = 3,
+ _frame_depth: int=3,
**kwargs) -> numpy.ndarray:
"""
Evaluate a simple array expression element-wise using the virtual machine.
@@ -917,8 +909,6 @@ def evaluate(ex: str,
_frame_depth: int
The calling frame depth. Unless you are a NumExpr developer you should
not set this value.
-
-
"""
# We could avoid code duplication if we called validate and then re_evaluate
# here, but they we have difficulties with the `sys.getframe(2)` call in
@@ -931,6 +921,10 @@ def evaluate(ex: str,
else:
raise e
+
+
+
+
def re_evaluate(local_dict: Optional[Dict] = None,
_frame_depth: int=2) -> numpy.ndarray:
"""
diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py
index ebc41c8d54..ccb0b6cb07 100644
--- a/numexpr/tests/test_numexpr.py
+++ b/numexpr/tests/test_numexpr.py
@@ -373,9 +373,8 @@ class test_evaluate(TestCase):
a1 = array([1., 2., 3.])
b1 = array([4., 5., 6.])
c1 = array([7., 8., 9.])
- local_dict={'a': a1, 'b': b1, 'c': c1}
- x = evaluate("2*a + 3*b*c", local_dict=local_dict)
- x = re_evaluate(local_dict=local_dict)
+ x = evaluate("2*a + 3*b*c", local_dict={'a': a1, 'b': b1, 'c': c1})
+ x = re_evaluate()
assert_array_equal(x, array([86., 124., 168.]))
def test_validate(self):
@@ -401,10 +400,9 @@ class test_evaluate(TestCase):
a1 = array([1., 2., 3.])
b1 = array([4., 5., 6.])
c1 = array([7., 8., 9.])
- local_dict={'a': a1, 'b': b1, 'c': c1}
- retval = validate("2*a + 3*b*c", local_dict=local_dict)
+ retval = validate("2*a + 3*b*c", local_dict={'a': a1, 'b': b1, 'c': c1})
assert(retval is None)
- x = re_evaluate(local_dict=local_dict)
+ x = re_evaluate()
assert_array_equal(x, array([86., 124., 168.]))
# Test for issue #22
@@ -504,49 +502,11 @@ class test_evaluate(TestCase):
a = arange(3)
try:
evaluate("a < [0, 0, 0]")
- except (ValueError, TypeError):
+ except TypeError:
pass
else:
self.fail()
- def test_forbidden_tokens(self):
- # Forbid dunder
- try:
- evaluate('__builtins__')
- except ValueError:
- pass
- else:
- self.fail()
-
- # Forbid colon for lambda funcs
- try:
- evaluate('lambda x: x')
- except ValueError:
- pass
- else:
- self.fail()
-
- # Forbid indexing
- try:
- evaluate('locals()[]')
- except ValueError:
- pass
- else:
- self.fail()
-
- # Forbid semicolon
- try:
- evaluate('import os; os.cpu_count()')
- except ValueError:
- pass
- else:
- self.fail()
-
- # I struggle to come up with cases for our ban on `'` and `"`
-
-
-
-
def test_disassemble(self):
assert_equal(disassemble(NumExpr(
"where(m, a, -1)", [('m', bool), ('a', float)])),