Add support for Python 3.12 (and adjust code for tokenizer changes).

pydata · Nov 30, 2023 · 6dae98d · 6dae98d
1 parent 6bd31ca
commit 6dae98d
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 9 deletions.
diff --git a/patsy/parse_formula.py b/patsy/parse_formula.py
@@ -84,15 +84,15 @@ def _tokenize_formula(code, operator_strings):
     # "magic" token does:
     end_tokens = set(magic_token_types)
     end_tokens.remove("(")
-    
+
     it = PushbackAdapter(python_tokenize(code))
     for pytype, token_string, origin in it:
         if token_string in magic_token_types:
             yield Token(magic_token_types[token_string], origin)
         else:
             it.push_back((pytype, token_string, origin))
             yield _read_python_expr(it, end_tokens)
-                    
+
 def test__tokenize_formula():
     code = "y ~ a + (foo(b,c +   2)) + -1 + 0 + 10"
     tokens = list(_tokenize_formula(code, ["+", "-", "~"]))
@@ -274,8 +274,8 @@ def _parsing_error_test(parse_fn, error_descs): # pragma: no cover
         except PatsyError as e:
             print(e)
             assert e.origin.code == bad_code
-            assert e.origin.start == start
-            assert e.origin.end == end
+            assert e.origin.start in (0, start)
+            assert e.origin.end in (end, len(bad_code))
         else:
             assert False, "parser failed to report an error!"
 

diff --git a/patsy/tokens.py b/patsy/tokens.py
@@ -31,11 +31,10 @@ def python_tokenize(code):
         for (pytype, string, (_, start), (_, end), code) in it:
             if pytype == tokenize.ENDMARKER:
                 break
-            origin = Origin(code, start, end)
-            assert pytype != tokenize.NL
-            if pytype == tokenize.NEWLINE:
+            if pytype in (tokenize.NL, tokenize.NEWLINE):
                 assert string == ""
                 continue
+            origin = Origin(code, start, end)
             if pytype == tokenize.ERRORTOKEN:
                 raise PatsyError("error tokenizing input "
                                  "(maybe an unclosed string?)",
@@ -53,8 +52,11 @@ def python_tokenize(code):
         # end of the source text. We have our own error handling for
         # such cases, so just treat this as an end-of-stream.
         #
+        if "unterminated string literal" in e.args[0]:
+            raise PatsyError(f"error tokenizing input ({e.args[0]})", Origin(code, 0, len(code)))
+
         # Just in case someone adds some other error case:
-        assert e.args[0].startswith("EOF in multi-line")
+        assert "EOF in multi-line" in e.args[0]
         return
 
 def test_python_tokenize():

diff --git a/setup.py b/setup.py
@@ -46,6 +46,7 @@
       "Programming Language :: Python :: 3.9",
       "Programming Language :: Python :: 3.10",
       "Programming Language :: Python :: 3.11",
+      "Programming Language :: Python :: 3.12",
       "Topic :: Scientific/Engineering",
     ],
 )
diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = {py27,py36,py37,py38,py39,py310,py311}-{with_pandas,without_pandas}
+envlist = {py27,py36,py37,py38,py39,py310,py311,py312}-{with_pandas,without_pandas}
 
 [gh-actions]
 python =
@@ -10,6 +10,7 @@ python =
   3.9: py39
   3.10: py310
   3.11: py311
+  3.12: py312
 
 [testenv]
 deps=