diff --git a/uncompyle6/scanner.py b/uncompyle6/scanner.py index 7384f1317..ee3e73bd1 100644 --- a/uncompyle6/scanner.py +++ b/uncompyle6/scanner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016, 2018-2023 by Rocky Bernstein +# Copyright (c) 2016, 2018-2024 by Rocky Bernstein # Copyright (c) 2005 by Dan Pascu # Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 1999 John Aycock @@ -21,13 +21,11 @@ scanners, e.g. for Python 2.7 or 3.4. """ -from types import ModuleType -from typing import Optional, Tuple, Union from array import array from collections import namedtuple +from types import ModuleType +from typing import Optional, Tuple, Union -from uncompyle6.scanners.tok import Token -from xdis.version_info import IS_PYPY, version_tuple_to_str import xdis from xdis import ( Bytecode, @@ -37,6 +35,9 @@ instruction_size, next_offset, ) +from xdis.version_info import IS_PYPY, version_tuple_to_str + +from uncompyle6.scanners.tok import Token # The byte code versions we support. # Note: these all have to be tuples of 2 ints @@ -80,6 +81,7 @@ # FIXME: DRY L65536 = 65536 + def long(num): return num @@ -96,7 +98,6 @@ class Code(object): """ def __init__(self, co, scanner, classname=None, show_asm=None): - # Full initialization is given below, but for linters # well set up some initial values. self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0 @@ -133,9 +134,7 @@ def __init__(self, version: tuple, show_asm=None, is_pypy=False): # FIXME: This weird Python2 behavior is not Python3 self.resetTokenClass() - def bound_collection_from_tokens( - self, tokens, t, i, collection_type - ): + def bound_collection_from_tokens(self, tokens, t, i, collection_type): count = t.attr assert isinstance(count, int) @@ -429,7 +428,7 @@ def inst_matches(self, start, end, instr, target=None, include_beyond_target=Fal """ try: None in instr - except: + except Exception: instr = [instr] first = self.offset2inst_index[start] @@ -620,16 +619,14 @@ def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]: def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner: - # If version is a string, turn that into the corresponding float. if isinstance(version, str): if version not in canonic_python_version: - raise RuntimeError("Unknown Python version in xdis %s" % version) + raise RuntimeError(f"Unknown Python version in xdis {version}") canonic_version = canonic_python_version[version] if canonic_version not in CANONIC2VERSION: raise RuntimeError( - "Unsupported Python version %s (canonic %s)" - % (version, canonic_version) + f"Unsupported Python version {version} (canonic {canonic_version})" ) version = CANONIC2VERSION[canonic_version] @@ -680,5 +677,6 @@ def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Sca # scanner = get_scanner('2.7.13', True) # scanner = get_scanner(sys.version[:5], False) from xdis.version_info import PYTHON_VERSION_TRIPLE + scanner = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY, True) tokens, customize = scanner.ingest(co, {}, show_asm="after") diff --git a/uncompyle6/semantics/pysource.py b/uncompyle6/semantics/pysource.py index 3b62481c2..714596857 100644 --- a/uncompyle6/semantics/pysource.py +++ b/uncompyle6/semantics/pysource.py @@ -131,6 +131,7 @@ import sys from io import StringIO +from typing import Optional from spark_parser import GenericASTTraversal from xdis import COMPILER_FLAG_BIT, iscode @@ -159,7 +160,11 @@ ) from uncompyle6.semantics.customize import customize_for_version from uncompyle6.semantics.gencomp import ComprehensionMixin -from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring +from uncompyle6.semantics.helper import ( + find_globals_and_nonlocals, + is_lambda_mode, + print_docstring, +) from uncompyle6.semantics.make_function1 import make_function1 from uncompyle6.semantics.make_function2 import make_function2 from uncompyle6.semantics.make_function3 import make_function3 @@ -213,7 +218,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin): def __init__( self, - version, + version: tuple, out, scanner, showast=TREE_DEFAULT_DEBUG, @@ -223,7 +228,7 @@ def __init__( linestarts={}, tolerate_errors=False, ): - """`version' is the Python version (a float) of the Python dialect + """`version' is the Python version of the Python dialect of both the syntax tree and language we should produce. `out' is IO-like file pointer to where the output should go. It @@ -235,9 +240,12 @@ def __init__( If `showast' is True, we print the syntax tree. - `compile_mode' is is either 'exec' or 'single'. It is the compile - mode that was used to create the Syntax Tree and specifies a - grammar variant within a Python version to use. + `compile_mode` is is either `exec`, `single` or `lambda`. + + For `lambda`, the grammar that can be used in lambda + expressions is used. Otherwise, it is the compile mode that + was used to create the Syntax Tree and specifies a grammar + variant within a Python version to use. `is_pypy` should be True if the Syntax Tree was generated for PyPy. @@ -262,10 +270,8 @@ def __init__( self.currentclass = None self.classes = [] self.debug_parser = dict(debug_parser) - # Initialize p_lambda on demand self.line_number = 1 self.linemap = {} - self.p_lambda = None self.params = params self.param_stack = [] self.ERROR = None @@ -276,11 +282,15 @@ def __init__( self.pending_newlines = 0 self.linestarts = linestarts self.treeTransform = TreeTransform(version=self.version, show_ast=showast) + # FIXME: have p.insts update in a better way # modularity is broken here self.insts = scanner.insts self.offset2inst_index = scanner.offset2inst_index + # Initialize p_lambda on demand + self.p_lambda = None + # This is in Python 2.6 on. It changes the way # strings get interpreted. See n_LOAD_CONST self.FUTURE_UNICODE_LITERALS = False @@ -507,19 +517,19 @@ def is_return_none(self, node): def pp_tuple(self, tup): """Pretty print a tuple""" last_line = self.f.getvalue().split("\n")[-1] - l = len(last_line) + 1 - indent = " " * l + ll = len(last_line) + 1 + indent = " " * ll self.write("(") sep = "" for item in tup: self.write(sep) - l += len(sep) + ll += len(sep) s = better_repr(item, self.version) - l += len(s) + ll += len(s) self.write(s) sep = "," - if l > LINE_LENGTH: - l = 0 + if ll > LINE_LENGTH: + ll = 0 sep += "\n" + indent else: sep += " " @@ -699,9 +709,10 @@ def template_engine(self, entry, startnode): """ # print("-----") - # print(startnode) + # print(startnode.kind) # print(entry[0]) # print('======') + fmt = entry[0] arg = 1 i = 0 @@ -794,13 +805,9 @@ def template_engine(self, entry, startnode): node[index].kind, ) else: - assert ( - node[tup[0]] in tup[1] - ), "at %s[%d], expected to be in '%s' node; got '%s'" % ( - node.kind, - arg, - index[1], - node[index[0]].kind, + assert node[tup[0]] in tup[1], ( + f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' " + f"node; got '{node[tup[0]].kind}'" ) else: @@ -869,7 +876,7 @@ def template_engine(self, entry, startnode): d = node.__dict__ try: self.write(eval(expr, d, d)) - except: + except Exception: raise m = escape.search(fmt, i) self.write(fmt[i:]) @@ -1190,7 +1197,7 @@ def build_ast( is_lambda=False, noneInNames=False, is_top_level_module=False, - ): + ) -> GenericASTTraversal: # FIXME: DRY with fragments.py # assert isinstance(tokens[0], Token) @@ -1242,7 +1249,7 @@ def build_ast( # Build a parse tree from a tokenized and massaged disassembly. try: # FIXME: have p.insts update in a better way - # modularity is broken here + # Modularity is broken here. p_insts = self.p.insts self.p.insts = self.scanner.insts self.p.offset2inst_index = self.scanner.offset2inst_index @@ -1255,6 +1262,7 @@ def build_ast( checker(ast, False, self.ast_errors) self.customize(customize) + transform_tree = self.treeTransform.transform(ast, code) self.maybe_show_tree(ast, phase="before") @@ -1270,13 +1278,15 @@ def _get_mapping(cls, node): def code_deparse( co, out=sys.stdout, - version=None, + version: Optional[tuple] = None, debug_opts=DEFAULT_DEBUG_OPTS, code_objects={}, compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): + start_offset: int = 0, + stop_offset: int = -1, +) -> Optional[SourceWalker]: """ ingests and deparses a given code block 'co'. If version is None, we will use the current Python interpreter version. @@ -1284,6 +1294,9 @@ def code_deparse( assert iscode(co) + if out is None: + out = sys.stdout + if version is None: version = PYTHON_VERSION_TRIPLE @@ -1294,6 +1307,21 @@ def code_deparse( co, code_objects=code_objects, show_asm=debug_opts["asm"] ) + if start_offset > 0: + for i, t in enumerate(tokens): + # If t.offset is a string, we want to skip this. + if isinstance(t.offset, int) and t.offset >= start_offset: + tokens = tokens[i:] + break + + if stop_offset > -1: + for i, t in enumerate(tokens): + # In contrast to the test for start_offset If t.offset is + # a string, we want to extract the integer offset value. + if t.off2int() >= stop_offset: + tokens = tokens[:i] + break + debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG)) # Build Syntax Tree from disassembly. @@ -1317,7 +1345,7 @@ def code_deparse( tokens, customize, co, - is_lambda=(compile_mode == "lambda"), + is_lambda=is_lambda_mode(compile_mode), is_top_level_module=is_top_level_module, ) @@ -1326,7 +1354,7 @@ def code_deparse( return None # FIXME use a lookup table here. - if compile_mode == "lambda": + if is_lambda_mode(compile_mode): expected_start = "lambda_start" elif compile_mode == "eval": expected_start = "expr_start" @@ -1339,10 +1367,12 @@ def code_deparse( expected_start = None else: expected_start = None + if expected_start: - assert ( - deparsed.ast == expected_start - ), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}" + assert deparsed.ast == expected_start, ( + f"Should have parsed grammar start to '{expected_start}'; " + f"got: {deparsed.ast.kind}" + ) # save memory del tokens @@ -1382,7 +1412,7 @@ def code_deparse( deparsed.ast, name=co.co_name, customize=customize, - is_lambda=compile_mode == "lambda", + is_lambda=is_lambda_mode(compile_mode), debug_opts=debug_opts, ) @@ -1410,9 +1440,12 @@ def deparse_code2str( compile_mode="exec", is_pypy=IS_PYPY, walker=SourceWalker, -): - """Return the deparsed text for a Python code object. `out` is where any intermediate - output for assembly or tree output will be sent. + start_offset: int = 0, + stop_offset: int = -1, +) -> str: + """ + Return the deparsed text for a Python code object. `out` is where + any intermediate output for assembly or tree output will be sent. """ return code_deparse( code, @@ -1427,6 +1460,7 @@ def deparse_code2str( if __name__ == "__main__": + def deparse_test(co): """This is a docstring""" s = deparse_code2str(co) @@ -1434,5 +1468,4 @@ def deparse_test(co): print(s) return - deparse_test(deparse_test.__code__)