Skip to content

Commit

Permalink
Testing using Python ast module (#252)
Browse files Browse the repository at this point in the history
* WIP Initial steps towards Python AST compat checking.

* A touch of refactoring to appease borrow checker + clippy.

* Begun working my way through various mismatches between the AST parser libraries.

* Added option to include source in output.

* Fixed up the string based tests, mostly. Added plenty of TODOs.

* Fixed some more tests.

* Fixed another statement.

* Updated some snapshots.

* More snapshots.

* Fixed a bug where subscripts with a dotted attribute ref had the wrong start/end.

* Removed some comments.

* Fixed lexer for nested tokens.

* Fixed more failing tests.

* Fix to_row_col function

* WIP Initial steps towards Python AST compat checking.

* A touch of refactoring to appease borrow checker + clippy.

* Begun working my way through various mismatches between the AST parser libraries.

* Added option to include source in output.

* Fixed up the string based tests, mostly. Added plenty of TODOs.

* Fixed some more tests.

* Fixed another statement.

* Updated some snapshots.

* More snapshots.

* Fixed a bug where subscripts with a dotted attribute ref had the wrong start/end.

* Removed some comments.

* Fixed lexer for nested tokens.

* Fixed more failing tests.

* Fix to_row_col function

* Fix end offsets in snapshots

* Fix more tests

* Fix not consuming RightParen in annotated assignment

* Fix line and end offset

* Run compat tests with python3.12

* Disable codecov

---------

Co-authored-by: Shaygan Hooshyari <[email protected]>
  • Loading branch information
dwoznicki and Glyphack authored Aug 17, 2024
1 parent 37547d8 commit c34a2e8
Show file tree
Hide file tree
Showing 46 changed files with 1,830 additions and 300 deletions.
41 changes: 23 additions & 18 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@ jobs:
with:
rust-version: stable
components: rustfmt, clippy
# Required for compatibility tests
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Build
run: cargo build

- name: Run tests
env:
RUST_BACKTRACE: 1
Expand All @@ -33,21 +38,21 @@ jobs:
run: make format-check
- name: clippy
run: make lint
coverage:
name: coverage
runs-on: ubuntu-latest
container:
image: xd009642/tarpaulin:develop-nightly
options: --security-opt seccomp=unconfined
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Generate code coverage
run: |
cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml
- name: Upload to codecov.io
uses: codecov/codecov-action@v4
with:
token: ${{secrets.CODECOV_TOKEN}}
fail_ci_if_error: false
# coverage:
# name: coverage
# runs-on: ubuntu-latest
# container:
# image: xd009642/tarpaulin:develop-nightly
# options: --security-opt seccomp=unconfined
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: true
# - name: Generate code coverage
# run: |
# cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml
# - name: Upload to codecov.io
# uses: codecov/codecov-action@v4
# with:
# token: ${{secrets.CODECOV_TOKEN}}
# fail_ci_if_error: false
2 changes: 2 additions & 0 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ reqwest = { version= "0.12.4", features = ["blocking"] }
tokio.workspace = true
tabled = "0.15"
terminal_size = "0.3"
assert-json-diff = "2.0"
pretty_assertions = "1.4"

[lib]
bench = false
102 changes: 102 additions & 0 deletions parser/ast_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import sys
import ast
from _ast import AST # Python internals I guess?
import argparse
import pathlib
import codecs
import json

arg_parser = argparse.ArgumentParser(
description="Parse a Python program to AST."
)
arg_parser.add_argument("--input-file", help="Read and parse input file.")
arg_parser.add_argument("--stdin", action="store_true", help="Read and parse input from stdin.")
arg_parser.add_argument("--type-comments", action="store_true", help="Produce an AST with type comments.")
args = arg_parser.parse_args()

if args.input_file is not None:
source = pathlib.Path(args.input_file).read_text()
elif args.stdin:
source = sys.stdin.read()
else:
print("Missing input parameter. Please specify one of --input-file or --stdin.", file=sys.stderr)
sys.exit(1)

# ----- Begin inline dependency -------------------------------------------------------------------
# https://github.com/YoloSwagTeam/ast2json

# Copyright (c) 2013, Laurent Peuch <[email protected]>
#
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the University of California, Berkeley nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

BUILTIN_PURE = (int, float, bool)
BUILTIN_BYTES = (bytearray, bytes)
BUILTIN_STR = (str)

def decode_str(value):
return value

def decode_bytes(value):
try:
return value.decode('utf-8')
except:
return codecs.getencoder('hex_codec')(value)[0].decode('utf-8')

def ast2json(node):
assert isinstance(node, AST)
to_return = dict()
to_return["_type"] = node.__class__.__name__
for attr in dir(node):
if attr.startswith("_") or attr == "n" or attr == "s":
continue
to_return[attr] = get_value(getattr(node, attr))
return to_return

def get_value(attr_value):
if attr_value is None:
return attr_value
if isinstance(attr_value, BUILTIN_PURE):
return attr_value
if isinstance(attr_value, BUILTIN_BYTES):
return decode_bytes(attr_value)
if isinstance(attr_value, BUILTIN_STR):
return decode_str(attr_value)
if isinstance(attr_value, complex):
return str(attr_value)
if isinstance(attr_value, list):
return [get_value(x) for x in attr_value]
if isinstance(attr_value, AST):
return ast2json(attr_value)
if isinstance(attr_value, type(Ellipsis)):
return "..."
else:
raise Exception("Unknown case for '%s' of type '%s'" % (attr_value, type(attr_value)))

# -------------------------------------------------------------------- End inline dependency ------


tree = ast.parse(source, filename=args.input_file or "stdin", mode="exec", type_comments=args.type_comments)
tree_json = ast2json(tree)
print(json.dumps(tree_json, indent=4))
24 changes: 20 additions & 4 deletions parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl<'a> Lexer<'a> {
let value = self.parse_token_value(kind, start);
let end = self.current;

if kind == Kind::NewLine || kind == Kind::NL {
if (kind == Kind::NewLine || kind == Kind::NL) && !self.peak_mode {
self.line_starts.push(self.current);
}

Expand Down Expand Up @@ -935,6 +935,22 @@ impl<'a> Lexer<'a> {
}
}
}
if self.nesting > 0 {
// Don't add indent/dedent tokens when in nested context.
// For example, in the following example
// ```
// a = (
// 1
// )
// ```
// the indentation of "1" is completely inconsequential. To be technically correct,
// we'll return a whiteSpace token if any amount of whitespace was found.
if spaces_count > 0 {
return Ok(Some(Kind::WhiteSpace));
} else {
return Ok(None);
}
}
if spaces_count == 0 {
// When there are no spaces and only a new line
// like the following
Expand All @@ -957,14 +973,14 @@ impl<'a> Lexer<'a> {
// loop over indent stack from the top and check if this element matches the
// new indentation level if nothing matches then it is an error
// do not pop the element from the stack
let mut indentation_matches_outer_evel = false;
let mut indentation_matches_outer_level = false;
for top in self.indent_stack.iter().rev() {
if top == &spaces_count {
indentation_matches_outer_evel = true;
indentation_matches_outer_level = true;
break;
}
}
if !indentation_matches_outer_evel {
if !indentation_matches_outer_level {
return Err(LexError::UnindentDoesNotMatchAnyOuterIndentationLevel);
}
Ok(Some(Kind::Dedent))
Expand Down
Loading

0 comments on commit c34a2e8

Please sign in to comment.