Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync with decompile3 #482

Merged
merged 2 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions uncompyle6/scanner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2016, 2018-2023 by Rocky Bernstein
# Copyright (c) 2016, 2018-2024 by Rocky Bernstein
# Copyright (c) 2005 by Dan Pascu <[email protected]>
# Copyright (c) 2000-2002 by hartmut Goebel <[email protected]>
# Copyright (c) 1999 John Aycock
Expand All @@ -21,13 +21,11 @@
scanners, e.g. for Python 2.7 or 3.4.
"""

from types import ModuleType
from typing import Optional, Tuple, Union
from array import array
from collections import namedtuple
from types import ModuleType
from typing import Optional, Tuple, Union

from uncompyle6.scanners.tok import Token
from xdis.version_info import IS_PYPY, version_tuple_to_str
import xdis
from xdis import (
Bytecode,
Expand All @@ -37,6 +35,9 @@
instruction_size,
next_offset,
)
from xdis.version_info import IS_PYPY, version_tuple_to_str

from uncompyle6.scanners.tok import Token

# The byte code versions we support.
# Note: these all have to be tuples of 2 ints
Expand Down Expand Up @@ -80,6 +81,7 @@
# FIXME: DRY
L65536 = 65536


def long(num):
return num

Expand All @@ -96,7 +98,6 @@ class Code(object):
"""

def __init__(self, co, scanner, classname=None, show_asm=None):

# Full initialization is given below, but for linters
# well set up some initial values.
self.co_code = None # Really either bytes for >= 3.0 and string in < 3.0
Expand Down Expand Up @@ -133,9 +134,7 @@ def __init__(self, version: tuple, show_asm=None, is_pypy=False):
# FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass()

def bound_collection_from_tokens(
self, tokens, t, i, collection_type
):
def bound_collection_from_tokens(self, tokens, t, i, collection_type):
count = t.attr
assert isinstance(count, int)

Expand Down Expand Up @@ -429,7 +428,7 @@ def inst_matches(self, start, end, instr, target=None, include_beyond_target=Fal
"""
try:
None in instr
except:
except Exception:
instr = [instr]

first = self.offset2inst_index[start]
Expand Down Expand Up @@ -620,16 +619,14 @@ def parse_fn_counts_30_35(argc: int) -> Tuple[int, int, int]:


def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Scanner:

# If version is a string, turn that into the corresponding float.
if isinstance(version, str):
if version not in canonic_python_version:
raise RuntimeError("Unknown Python version in xdis %s" % version)
raise RuntimeError(f"Unknown Python version in xdis {version}")
canonic_version = canonic_python_version[version]
if canonic_version not in CANONIC2VERSION:
raise RuntimeError(
"Unsupported Python version %s (canonic %s)"
% (version, canonic_version)
f"Unsupported Python version {version} (canonic {canonic_version})"
)
version = CANONIC2VERSION[canonic_version]

Expand Down Expand Up @@ -680,5 +677,6 @@ def get_scanner(version: Union[str, tuple], is_pypy=False, show_asm=None) -> Sca
# scanner = get_scanner('2.7.13', True)
# scanner = get_scanner(sys.version[:5], False)
from xdis.version_info import PYTHON_VERSION_TRIPLE

scanner = get_scanner(PYTHON_VERSION_TRIPLE, IS_PYPY, True)
tokens, customize = scanner.ingest(co, {}, show_asm="after")
107 changes: 70 additions & 37 deletions uncompyle6/semantics/pysource.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@

import sys
from io import StringIO
from typing import Optional

from spark_parser import GenericASTTraversal
from xdis import COMPILER_FLAG_BIT, iscode
Expand Down Expand Up @@ -159,7 +160,11 @@
)
from uncompyle6.semantics.customize import customize_for_version
from uncompyle6.semantics.gencomp import ComprehensionMixin
from uncompyle6.semantics.helper import find_globals_and_nonlocals, print_docstring
from uncompyle6.semantics.helper import (
find_globals_and_nonlocals,
is_lambda_mode,
print_docstring,
)
from uncompyle6.semantics.make_function1 import make_function1
from uncompyle6.semantics.make_function2 import make_function2
from uncompyle6.semantics.make_function3 import make_function3
Expand Down Expand Up @@ -213,7 +218,7 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):

def __init__(
self,
version,
version: tuple,
out,
scanner,
showast=TREE_DEFAULT_DEBUG,
Expand All @@ -223,7 +228,7 @@ def __init__(
linestarts={},
tolerate_errors=False,
):
"""`version' is the Python version (a float) of the Python dialect
"""`version' is the Python version of the Python dialect
of both the syntax tree and language we should produce.

`out' is IO-like file pointer to where the output should go. It
Expand All @@ -235,9 +240,12 @@ def __init__(

If `showast' is True, we print the syntax tree.

`compile_mode' is is either 'exec' or 'single'. It is the compile
mode that was used to create the Syntax Tree and specifies a
grammar variant within a Python version to use.
`compile_mode` is is either `exec`, `single` or `lambda`.

For `lambda`, the grammar that can be used in lambda
expressions is used. Otherwise, it is the compile mode that
was used to create the Syntax Tree and specifies a grammar
variant within a Python version to use.

`is_pypy` should be True if the Syntax Tree was generated for PyPy.

Expand All @@ -262,10 +270,8 @@ def __init__(
self.currentclass = None
self.classes = []
self.debug_parser = dict(debug_parser)
# Initialize p_lambda on demand
self.line_number = 1
self.linemap = {}
self.p_lambda = None
self.params = params
self.param_stack = []
self.ERROR = None
Expand All @@ -276,11 +282,15 @@ def __init__(
self.pending_newlines = 0
self.linestarts = linestarts
self.treeTransform = TreeTransform(version=self.version, show_ast=showast)

# FIXME: have p.insts update in a better way
# modularity is broken here
self.insts = scanner.insts
self.offset2inst_index = scanner.offset2inst_index

# Initialize p_lambda on demand
self.p_lambda = None

# This is in Python 2.6 on. It changes the way
# strings get interpreted. See n_LOAD_CONST
self.FUTURE_UNICODE_LITERALS = False
Expand Down Expand Up @@ -507,19 +517,19 @@ def is_return_none(self, node):
def pp_tuple(self, tup):
"""Pretty print a tuple"""
last_line = self.f.getvalue().split("\n")[-1]
l = len(last_line) + 1
indent = " " * l
ll = len(last_line) + 1
indent = " " * ll
self.write("(")
sep = ""
for item in tup:
self.write(sep)
l += len(sep)
ll += len(sep)
s = better_repr(item, self.version)
l += len(s)
ll += len(s)
self.write(s)
sep = ","
if l > LINE_LENGTH:
l = 0
if ll > LINE_LENGTH:
ll = 0
sep += "\n" + indent
else:
sep += " "
Expand Down Expand Up @@ -699,9 +709,10 @@ def template_engine(self, entry, startnode):
"""

# print("-----")
# print(startnode)
# print(startnode.kind)
# print(entry[0])
# print('======')

fmt = entry[0]
arg = 1
i = 0
Expand Down Expand Up @@ -794,13 +805,9 @@ def template_engine(self, entry, startnode):
node[index].kind,
)
else:
assert (
node[tup[0]] in tup[1]
), "at %s[%d], expected to be in '%s' node; got '%s'" % (
node.kind,
arg,
index[1],
node[index[0]].kind,
assert node[tup[0]] in tup[1], (
f"at {node.kind}[{tup[0]}], expected to be in '{tup[1]}' "
f"node; got '{node[tup[0]].kind}'"
)

else:
Expand Down Expand Up @@ -869,7 +876,7 @@ def template_engine(self, entry, startnode):
d = node.__dict__
try:
self.write(eval(expr, d, d))
except:
except Exception:
raise
m = escape.search(fmt, i)
self.write(fmt[i:])
Expand Down Expand Up @@ -1190,7 +1197,7 @@ def build_ast(
is_lambda=False,
noneInNames=False,
is_top_level_module=False,
):
) -> GenericASTTraversal:
# FIXME: DRY with fragments.py

# assert isinstance(tokens[0], Token)
Expand Down Expand Up @@ -1242,7 +1249,7 @@ def build_ast(
# Build a parse tree from a tokenized and massaged disassembly.
try:
# FIXME: have p.insts update in a better way
# modularity is broken here
# Modularity is broken here.
p_insts = self.p.insts
self.p.insts = self.scanner.insts
self.p.offset2inst_index = self.scanner.offset2inst_index
Expand All @@ -1255,6 +1262,7 @@ def build_ast(
checker(ast, False, self.ast_errors)

self.customize(customize)

transform_tree = self.treeTransform.transform(ast, code)

self.maybe_show_tree(ast, phase="before")
Expand All @@ -1270,20 +1278,25 @@ def _get_mapping(cls, node):
def code_deparse(
co,
out=sys.stdout,
version=None,
version: Optional[tuple] = None,
debug_opts=DEFAULT_DEBUG_OPTS,
code_objects={},
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
start_offset: int = 0,
stop_offset: int = -1,
) -> Optional[SourceWalker]:
"""
ingests and deparses a given code block 'co'. If version is None,
we will use the current Python interpreter version.
"""

assert iscode(co)

if out is None:
out = sys.stdout

if version is None:
version = PYTHON_VERSION_TRIPLE

Expand All @@ -1294,6 +1307,21 @@ def code_deparse(
co, code_objects=code_objects, show_asm=debug_opts["asm"]
)

if start_offset > 0:
for i, t in enumerate(tokens):
# If t.offset is a string, we want to skip this.
if isinstance(t.offset, int) and t.offset >= start_offset:
tokens = tokens[i:]
break

if stop_offset > -1:
for i, t in enumerate(tokens):
# In contrast to the test for start_offset If t.offset is
# a string, we want to extract the integer offset value.
if t.off2int() >= stop_offset:
tokens = tokens[:i]
break

debug_parser = debug_opts.get("grammar", dict(PARSER_DEFAULT_DEBUG))

# Build Syntax Tree from disassembly.
Expand All @@ -1317,7 +1345,7 @@ def code_deparse(
tokens,
customize,
co,
is_lambda=(compile_mode == "lambda"),
is_lambda=is_lambda_mode(compile_mode),
is_top_level_module=is_top_level_module,
)

Expand All @@ -1326,7 +1354,7 @@ def code_deparse(
return None

# FIXME use a lookup table here.
if compile_mode == "lambda":
if is_lambda_mode(compile_mode):
expected_start = "lambda_start"
elif compile_mode == "eval":
expected_start = "expr_start"
Expand All @@ -1339,10 +1367,12 @@ def code_deparse(
expected_start = None
else:
expected_start = None

if expected_start:
assert (
deparsed.ast == expected_start
), f"Should have parsed grammar start to '{expected_start}'; got: {deparsed.ast.kind}"
assert deparsed.ast == expected_start, (
f"Should have parsed grammar start to '{expected_start}'; "
f"got: {deparsed.ast.kind}"
)
# save memory
del tokens

Expand Down Expand Up @@ -1382,7 +1412,7 @@ def code_deparse(
deparsed.ast,
name=co.co_name,
customize=customize,
is_lambda=compile_mode == "lambda",
is_lambda=is_lambda_mode(compile_mode),
debug_opts=debug_opts,
)

Expand Down Expand Up @@ -1410,9 +1440,12 @@ def deparse_code2str(
compile_mode="exec",
is_pypy=IS_PYPY,
walker=SourceWalker,
):
"""Return the deparsed text for a Python code object. `out` is where any intermediate
output for assembly or tree output will be sent.
start_offset: int = 0,
stop_offset: int = -1,
) -> str:
"""
Return the deparsed text for a Python code object. `out` is where
any intermediate output for assembly or tree output will be sent.
"""
return code_deparse(
code,
Expand All @@ -1427,12 +1460,12 @@ def deparse_code2str(


if __name__ == "__main__":

def deparse_test(co):
"""This is a docstring"""
s = deparse_code2str(co)
# s = deparse_code2str(co, debug_opts={"asm": "after", "tree": {'before': False, 'after': False}})
print(s)
return


deparse_test(deparse_test.__code__)
Loading