Blob Blame History Raw
From be21651a1fada038b8ca00938d063fbb3336b989 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Wed, 16 Aug 2023 17:23:26 +0200
Subject: [PATCH 2/7] Revert "Make more difficult sanitize of the expression
 string before eval"

This reverts commit 00b035c78ca5ac209b58b56b5dcc99596cac423c.
---
 ANNOUNCE.rst                  | 23 ++---------------------
 RELEASE_NOTES.rst             | 19 +------------------
 doc/user_guide.rst            | 27 ++++++++++-----------------
 numexpr/necompiler.py         | 27 ++++++++-------------------
 numexpr/tests/test_numexpr.py | 18 ++----------------
 5 files changed, 23 insertions(+), 91 deletions(-)

diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst
index 4e9070f2cc..f038df4f44 100644
--- a/ANNOUNCE.rst
+++ b/ANNOUNCE.rst
@@ -4,10 +4,7 @@ Announcing NumExpr 2.8.5
 
 Hi everyone, 
 
-In 2.8.5 we have added a new function, `validate` which checks an expression `ex`
-for validity, for usage where the program is parsing a user input. There are also 
-consequences for this sort of usage, since `eval(ex)` is called, and as such we 
-do some string sanitization as described below.
+**Under development.**
 
 Project documentation is available at:
 
@@ -16,23 +13,7 @@ http://numexpr.readthedocs.io/
 Changes from 2.8.4 to 2.8.5
 ---------------------------
 
-* A `validate` function has been added. This function checks the inputs, returning 
-  `None` on success or raising an exception on invalid inputs. This function was 
-  added as numerous projects seem to be using NumExpr for parsing user inputs.
-  `re_evaluate` may be called directly following `validate`.
-* As an addendum to the use of NumExpr for parsing user inputs, is that NumExpr
-  calls `eval` on the inputs. A regular expression is now applied to help sanitize 
-  the input expression string, forbidding '__', ':', and ';'. Attribute access 
-  is also banned except for '.r' for real and '.i'  for imag.
-* Thanks to timbrist for a fix to behavior of NumExpr with integers to negative 
-  powers. NumExpr was pre-checking integer powers for negative values, which 
-  was both inefficient and causing parsing errors in some situations. Now NumExpr
-  will simply return 0 as a result for such cases. While NumExpr generally tries 
-  to follow NumPy behavior, performance is also critical. 
-* Thanks to peadar for some fixes to how NumExpr launches threads for embedded 
-  applications.
-* Thanks to de11n for making parsing of the `site.cfg` for MKL consistent among 
-  all shared platforms.
+**Under development.**
 
 
 What's Numexpr?
diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 9cf5d3977b..4929a42e12 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -5,24 +5,7 @@ Release notes for NumExpr 2.8 series
 Changes from 2.8.4 to 2.8.5
 ---------------------------
 
-* A `validate` function has been added. This function checks the inputs, returning 
-  `None` on success or raising an exception on invalid inputs. This function was 
-  added as numerous projects seem to be using NumExpr for parsing user inputs.
-  `re_evaluate` may be called directly following `validate`.
-* As an addendum to the use of NumExpr for parsing user inputs, is that NumExpr
-  calls `eval` on the inputs. A regular expression is now applied to help sanitize 
-  the input expression string, forbidding '__', ':', and ';'. Attribute access 
-  is also banned except for '.r' for real and '.i'  for imag.
-* Thanks to timbrist for a fix to behavior of NumExpr with integers to negative 
-  powers. NumExpr was pre-checking integer powers for negative values, which 
-  was both inefficient and causing parsing errors in some situations. Now NumExpr
-  will simply return 0 as a result for such cases. While NumExpr generally tries 
-  to follow NumPy behavior, performance is also critical. 
-* Thanks to peadar for some fixes to how NumExpr launches threads for embedded 
-  applications.
-* Thanks to de11n for making parsing of the `site.cfg` for MKL consistent among 
-  all shared platforms.
-
+**Under development.**
 
 Changes from 2.8.3 to 2.8.4
 ---------------------------
diff --git a/doc/user_guide.rst b/doc/user_guide.rst
index 3a3cf63d9c..74306eb658 100644
--- a/doc/user_guide.rst
+++ b/doc/user_guide.rst
@@ -1,7 +1,7 @@
-NumExpr 2.8 User Guide
+NumExpr 2.0 User Guide
 ======================
 
-The NumExpr package supplies routines for the fast evaluation of
+The :code:`numexpr` package supplies routines for the fast evaluation of
 array expressions elementwise by using a vector-based virtual
 machine.
 
@@ -11,33 +11,23 @@ Using it is simple::
     >>> import numexpr as ne
     >>> a = np.arange(10)
     >>> b = np.arange(0, 20, 2)
-    >>> c = ne.evaluate('2*a + 3*b')
+    >>> c = ne.evaluate("2*a+3*b")
     >>> c
     array([ 0,  8, 16, 24, 32, 40, 48, 56, 64, 72])
 
 
-It is also possible to use NumExpr to validate an expression::
-
-    >>> ne.validate('2*a + 3*b')
-
-which returns `None` on success or raises an exception on invalid inputs.
-
-and it can also re_evaluate an expression::
-
-    >>> b = np.arange(0, 40, 4)
-    >>> ne.re_evaluate()
-
 Building
 --------
 
-*NumExpr* requires Python_ 3.7 or greater, and NumPy_ 1.13 or greater.  It is 
+*NumExpr* requires Python_ 2.6 or greater, and NumPy_ 1.7 or greater.  It is 
 built in the standard Python way:
 
 .. code-block:: bash
 
-    $ pip install .
+    $ python setup.py build
+    $ python setup.py install
 
-You must have a C-compiler (i.e. MSVC Build tools on Windows and GCC on Linux) installed.
+You must have a C-compiler (i.e. MSVC on Windows and GCC on Linux) installed.
 
 Then change to a directory that is not the repository directory (e.g. `/tmp`) and 
 test :code:`numexpr` with:
@@ -278,6 +268,9 @@ General routines
   * :code:`detect_number_of_cores()`: Detects the number of cores on a system.
 
 
+
+
+
 Intel's VML specific support routines
 -------------------------------------
 
diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py
index cbf290932b..fef886baf5 100644
--- a/numexpr/necompiler.py
+++ b/numexpr/necompiler.py
@@ -260,17 +260,15 @@ class Immediate(Register):
     def __str__(self):
         return 'Immediate(%d)' % (self.node.value,)
 
-
-_forbidden_re = re.compile('[\;[\:]|__|\.[abcdefghjklmnopqstuvwxyzA-Z_]')
+_forbidden_re = re.compile('[\;[\:]|__')
 def stringToExpression(s, types, context):
     """Given a string, convert it to a tree of ExpressionNode's.
     """
     # sanitize the string for obvious attack vectors that NumExpr cannot 
     # parse into its homebrew AST. This is to protect the call to `eval` below.
-    # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'.
-    # We cannot ban `.real` or `.imag` however...
-    no_whitespace = re.sub(r'\s+', '', s)
-    if _forbidden_re.search(no_whitespace) is not None:
+    # We forbid `;`, `:`. `[` and `__`
+    # We would like to forbid `.` but it is both a reference and decimal point.
+    if _forbidden_re.search(s) is not None:
         raise ValueError(f'Expression {s} has forbidden control characters.')
     
     old_ctx = expressions._context.get_current_context()
@@ -768,6 +766,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2):
 _names_cache = CacheDict(256)
 _numexpr_cache = CacheDict(256)
 _numexpr_last = {}
+_numexpr_sanity = set()
 evaluate_lock = threading.Lock()
 
 # MAYBE: decorate this function to add attributes instead of having the 
@@ -829,13 +828,6 @@ def validate(ex: str,
     _frame_depth: int
         The calling frame depth. Unless you are a NumExpr developer you should 
         not set this value.
-
-    Note
-    ----
-    Both `validate` and by extension `evaluate` call `eval(ex)`, which is 
-    potentially dangerous on unsanitized inputs. As such, NumExpr does some 
-    sanitization, banning the character ':;[', the dunder '__', and attribute
-    access to all but '.r' for real and '.i' for imag access to complex numbers.
     """
     global _numexpr_last
 
@@ -865,6 +857,8 @@ def validate(ex: str,
         kwargs = {'out': out, 'order': order, 'casting': casting,
                 'ex_uses_vml': ex_uses_vml}
         _numexpr_last = dict(ex=compiled_ex, argnames=names, kwargs=kwargs)
+        # with evaluate_lock:
+        #     return compiled_ex(*arguments, **kwargs)
     except Exception as e:
         return e
     return None
@@ -924,12 +918,7 @@ def evaluate(ex: str,
         The calling frame depth. Unless you are a NumExpr developer you should 
         not set this value.
 
-    Note
-    ----
-    Both `validate` and by extension `evaluate` call `eval(ex)`, which is 
-    potentially dangerous on unsanitized inputs. As such, NumExpr does some 
-    sanitization, banning the character ':;[', the dunder '__', and attribute
-    access to all but '.r' for real and '.i' for imag access to complex numbers.
+    
     """
     # We could avoid code duplication if we called validate and then re_evaluate 
     # here, but they we have difficulties with the `sys.getframe(2)` call in
diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py
index a9f917fccd..ebc41c8d54 100644
--- a/numexpr/tests/test_numexpr.py
+++ b/numexpr/tests/test_numexpr.py
@@ -536,27 +536,13 @@ class test_evaluate(TestCase):
 
         # Forbid semicolon
         try:
-            evaluate('import os;')
+            evaluate('import os; os.cpu_count()')
         except ValueError:
             pass
         else:
             self.fail()
 
-        # Attribute access
-        try:
-            evaluate('os.cpucount()')
-        except ValueError:
-            pass
-        else:
-            self.fail()
-
-        # But decimal point must pass
-        a = 3.0
-        evaluate('a*2.')
-        evaluate('2.+a')
-        
-
-        
+        # I struggle to come up with cases for our ban on `'` and `"`