Skip to content

Commit

Permalink
add support for _ separator
Browse files Browse the repository at this point in the history
  • Loading branch information
z80dev committed Nov 9, 2023
1 parent c7538d5 commit 596548a
Show file tree
Hide file tree
Showing 4 changed files with 329 additions and 4 deletions.
4 changes: 2 additions & 2 deletions examples/Foo.vy
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#pragma version 0.3.10
#pragma version 0.3.8

event Foo:
arg1: uint256
Expand All @@ -18,7 +18,7 @@ FEE: constant(uint256) = 100

@external
def __init__():
y: uint256 = FEE
y: uint256 = 100_000_100
x: uint256 = FEE
z: Bar = Bar({x: y})
m: Roles = Roles.ADMIN
Expand Down
Empty file added grammar/__init__.py
Empty file.
321 changes: 321 additions & 0 deletions grammar/grammar.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
// Vyper grammar for Lark

// A module is a sequence of definitions and methods (and comments).
// NOTE: Start symbol for the grammar
// NOTE: Module can start with docstring
module: ( DOCSTRING
| COMMENT
| import
| struct_def
| interface_def
| constant_def
| variable_def
| enum_def
| event_def
| function_def
| immutable_def
| _NEWLINE )*


// Import statements (Supports all styles of Python imports)
_AS: "as"
_FROM: "from"
_IMPORT: "import"
DOT: "."
WILDCARD: "*"
_import_name: NAME
// NOTE: Don't use DOT here, just a separator
_import_path: (_import_name ".")* _import_name
import_alias: _AS NAME
?import_list: _import_name [import_alias] ("," _import_name [import_alias] )* [","]
_import_from: _FROM (DOT* _import_path | DOT+)
import: _IMPORT DOT* _import_path [import_alias]
| _import_from _IMPORT ( WILDCARD | _import_name [import_alias] )
| _import_from _IMPORT "(" import_list ")"

// Constant definitions
// NOTE: Temporary until decorators used
constant: "constant" "(" type ")"
constant_private: NAME ":" constant
constant_with_getter: NAME ":" "public" "(" constant ")"
constant_def: (constant_private | constant_with_getter) "=" _expr

// immutable definitions
// NOTE: Temporary until decorators used
immutable: "immutable" "(" type ")"
immutable_def: NAME ":" immutable

variable: NAME ":" type
// NOTE: Temporary until decorators used
variable_with_getter: NAME ":" "public" "(" (type | immutable) ")"
variable_def: variable | variable_with_getter

// A decorator "wraps" a method, modifying it's context.
// NOTE: One or more can be applied (some combos might conflict)
decorator: "@" NAME [ "(" [arguments] ")" ] _NEWLINE
decorators: decorator+

// Functions/Methods take a list of zero or more typed parameters,
// and can return up to one parameter.
// NOTE: Parameters can have a default value,
// which must be a constant or environment variable.
parameter: NAME ":" type ["=" _expr]
parameters: parameter ("," parameter?)*

_FUNC_DECL: "def"
_RETURN_TYPE: "->"
returns: _RETURN_TYPE type
function_sig: _FUNC_DECL NAME "(" [parameters] ")" [returns]
function_def: [decorators] function_sig ":" body

// Events can be composed of 0 or more members
_EVENT_DECL: "event"
event_member: NAME ":" type
indexed_event_arg: NAME ":" "indexed" "(" type ")"
// Events which use no args use a pass statement instead
event_body: _NEWLINE _INDENT (((event_member | indexed_event_arg ) _NEWLINE)+ | _PASS _NEWLINE) _DEDENT
event_def: _EVENT_DECL NAME ":" ( event_body | _PASS )

// Enums
_ENUM_DECL: "enum"
enum_member: NAME
enum_body: _NEWLINE _INDENT (enum_member _NEWLINE)+ _DEDENT
enum_def: _ENUM_DECL NAME ":" enum_body

// Types
array_def: (NAME | array_def | dyn_array_def) "[" _expr "]"
dyn_array_def: "DynArray" "[" (NAME | array_def | dyn_array_def) "," _expr "]"
tuple_def: "(" ( NAME | array_def | dyn_array_def | tuple_def ) ( "," ( NAME | array_def | dyn_array_def | tuple_def ) )* [","] ")"
// NOTE: Map takes a basic type and maps to another type (can be non-basic, including maps)
_MAP: "HashMap"
map_def: _MAP "[" ( NAME | array_def ) "," type "]"
type: ( NAME | array_def | tuple_def | map_def | dyn_array_def )

// Structs can be composed of 1+ basic types or other custom_types
_STRUCT_DECL: "struct"
struct_member: NAME ":" type
struct_def: _STRUCT_DECL NAME ":" _NEWLINE _INDENT (struct_member _NEWLINE)+ _DEDENT

// Interfaces are composed of a series of method definitions, plus their mutability
_INTERFACE_DECL: "interface"
mutability: NAME
interface_function: function_sig ":" mutability
interface_def: _INTERFACE_DECL NAME ":" _NEWLINE _INDENT ( interface_function _NEWLINE)+ _DEDENT
_IMPLEMENTS_DECL: "implements"
implements_def: _IMPLEMENTS_DECL ":" NAME


// Statements
// If and For blocks create a new block, and thus are complete when de-indented
// Conversely, the rest of the statements require a newline to be considered complete
// (as they do not create a new block)
_stmt: ( if_stmt | for_stmt ) [COMMENT]
| (declaration
| assign
| aug_assign
| return_stmt
| pass_stmt
| break_stmt
| continue_stmt
| log_stmt
| raise_stmt
| assert_stmt
| _expr ) [COMMENT] _NEWLINE

declaration: variable ["=" _expr]
skip_assign: "_"
multiple_assign: (variable_access | skip_assign) ("," (variable_access | skip_assign))+
assign: (variable_access | multiple_assign | "(" multiple_assign ")" ) "=" _expr
// NOTE: Keep these in sync with bin_op below
?aug_operator: "+" -> add
| "-" -> sub
| "*" -> mul
| "/" -> div
| "%" -> mod
| "**" -> pow
| "<<" -> shl
| ">>" -> shr
| _BITAND -> bitand
| _BITOR -> bitor
| _BITXOR -> bitxor
| _AND -> and
| _OR -> or
// NOTE: Post-process into a normal assign
aug_assign: variable_access aug_operator "=" _expr

_PASS: "pass"
_BREAK: "break"
_CONTINUE: "continue"
_LOG: "log"
_RETURN: "return"
_RAISE: "raise"
_ASSERT: "assert"

pass_stmt: _PASS
break_stmt: _BREAK
continue_stmt: _CONTINUE

log_stmt: _LOG NAME "(" [arguments] ")"
return_stmt: _RETURN [_expr ("," _expr)*]
_UNREACHABLE: "UNREACHABLE"
raise_stmt: _RAISE -> raise
| _RAISE _expr -> raise_with_reason
| _RAISE _UNREACHABLE -> raise_unreachable
assert_stmt: _ASSERT _expr -> assert
| _ASSERT _expr "," _expr -> assert_with_reason
| _ASSERT _expr "," _UNREACHABLE -> assert_unreachable

body: _NEWLINE _INDENT ([COMMENT] _NEWLINE | _stmt)+ _DEDENT
cond_exec: _expr ":" body
default_exec: body
if_stmt: "if" cond_exec ("elif" cond_exec)* ["else" ":" default_exec]
// TODO: make this into a variable definition e.g. `for i: uint256 in range(0, 5): ...`
loop_variable: NAME [":" NAME]
loop_iterator: _expr
for_stmt: "for" loop_variable "in" loop_iterator ":" body

// ternary operator
ternary: _expr "if" _expr "else" _expr

// Expressions
_expr: operation
| dict
| ternary

get_item: (variable_access | list) "[" _expr "]"
get_attr: variable_access "." NAME
call: variable_access "(" [arguments] ")"
?variable_access: NAME -> get_var
| get_item
| get_attr
| call
| "(" variable_access ")"

arg: _expr
kwarg: NAME "=" _expr
?argument: (arg | kwarg)
arguments: argument ("," argument)* [","]

tuple: "(" "," ")" | "(" _expr ( ("," _expr)+ [","] | "," ) ")"
list: "[" "]" | "[" _expr ("," _expr)* [","] "]"
dict: "{" "}" | "{" (NAME ":" _expr) ("," (NAME ":" _expr))* [","] "}"


// Operators
// This section needs to match Python's operator precedence:
// See https://docs.python.org/3/reference/expressions.html#operator-precedence
// NOTE: The recursive cycle here helps enforce operator precedence
// Precedence goes up the lower down you go
?operation: bool_or

_AND: "and"
_OR: "or"
_NOT: "not"

// Boolean Operations
?bool_or: bool_and
| bool_or _OR bool_and -> or

?bool_and: bool_not
| bool_and _AND bool_not -> and

?bool_not: comparator
| _NOT bool_not -> not

_POW: "**"
_SHL: "<<"
_SHR: ">>"
_BITAND: "&"
_BITOR: "|"
_BITXOR: "^"

// Comparisions
_EQ: "=="
_NE: "!="
_LE: "<="
_GE: ">="
_IN: "in"
?comparator: bitwise_or
| comparator "<" bitwise_or -> lt
| comparator ">" bitwise_or -> gt
| comparator _EQ bitwise_or -> eq
| comparator _NE bitwise_or -> ne
| comparator _LE bitwise_or -> le
| comparator _GE bitwise_or -> ge
| comparator _IN bitwise_or -> in
| comparator _NOT _IN bitwise_or -> in

// Binary Operations
// NOTE: Keep these in sync with aug_assign above
?bitwise_or : bitwise_xor
| bitwise_or _BITOR bitwise_xor -> bitor
?bitwise_xor: bitwise_and
| bitwise_xor _BITXOR bitwise_and -> bitxor
?bitwise_and: shift
| bitwise_and _BITAND shift -> bitand
?shift: summation
| shift _SHL summation -> shl
| shift _SHR summation -> shr
?summation: product
| summation "+" product -> add
| summation "-" product -> sub
?product: unary
| product "*" unary -> mul
| product "/" unary -> div
| product "%" unary -> mod
?unary: power
| "+" power -> uadd
| "-" power -> usub
| "~" power -> invert
?power: atom
| power _POW atom -> pow

// special rule to handle types as "arguments" (for `empty` builtin)
empty: "empty" "(" type ")"

// special rule to handle types as "arguments" (for `_abi_decode` builtin)
abi_decode: "_abi_decode" "(" arg "," type ( "," kwarg )* ")"

special_builtins: empty | abi_decode

// NOTE: Must end recursive cycle like this (with `atom` calling `operation`)
?atom: variable_access
| literal
| special_builtins
| tuple
| list
| "(" operation ")"

// Tokens
// Adapted from Lark repo. https://github.com/lark-parser/lark/blob/master/examples/python3.lark
// Adapted from: https://docs.python.org/3/reference/grammar.html
// Adapted by: Erez Shinan
NAME: /[a-zA-Z_]\w*/
COMMENT: /#[^\n]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+


STRING: /b?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
DOCSTRING: /(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is

DEC_NUMBER: /0|[1-9]([0-9]*_)?([0-9]+_)*[0-9]+/
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o([0-7]+_)*[0-7]+/
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+(_\d+)*\.\d*|\.\d+)(e[-+]?\d+)?|\d+(_\d+)*(e[-+]?\d+)?)/i

_number: DEC_NUMBER
| HEX_NUMBER
| BIN_NUMBER
| OCT_NUMBER
| FLOAT_NUMBER

BOOL.2: "True" | "False"

// TODO: Remove Docstring from here, and add to first part of body
?literal: ( _number | STRING | DOCSTRING | BOOL )

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
%declare _INDENT _DEDENT
8 changes: 6 additions & 2 deletions vyper_lsp/analyzer/SourceAnalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@

from pathlib import Path

import vyper
from lark import Lark
from vyper.ast.grammar import PythonIndenter

GRAMMAR_FILE_PATH = Path(vyper.__file__).parent / "ast" / "grammar.lark"
GRAMMAR_FILE_PATH = Path(__file__).parent.parent.parent / "grammar" / "grammar.lark"
GRAMMAR = GRAMMAR_FILE_PATH.read_text()

parser = Lark(GRAMMAR, parser="lalr", start="module", postlex=PythonIndenter())
Expand All @@ -49,6 +48,11 @@ def extract_version_pragma(line: str) -> Optional[str]:
return None


# regex that matches numbers and underscores
# ex: 1_000_000
NUMBER_REGEX = re.compile(r"^(_\d+)*$")


class SourceAnalyzer(Analyzer):
def __init__(self) -> None:
self.parser_enabled = True
Expand Down

0 comments on commit 596548a

Please sign in to comment.