Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Proposal] Add regex matcher #148

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bin/jp-compliance
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ class ComplianceTestRunner(object):
command.append(test_case['expression'])
process = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
stdin=subprocess.PIPE,
encoding='utf-8')
process.stdin.write(json.dumps(test_case['given']))
process.stdin.flush()
stdout, stderr = process.communicate()
Expand Down
32 changes: 31 additions & 1 deletion jmespath/lexer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import string
import warnings
from json import loads
import re

from jmespath.exceptions import LexerError, EmptyExpressionError

Expand Down Expand Up @@ -62,6 +63,8 @@ def tokenize(self, expression):
yield self._match_or_else('&', 'and', 'expref')
elif self._current == '`':
yield self._consume_literal()
elif self._current == '/':
yield self._consume_regex_literal()
elif self._current in self.VALID_NUMBER:
start = self._position
buff = self._consume_number()
Expand All @@ -87,10 +90,15 @@ def tokenize(self, expression):
elif self._current == '!':
yield self._match_or_else('=', 'ne', 'not')
elif self._current == '=':
if self._next() == '=':
next_char = self._next()
if next_char == '=':
yield {'type': 'eq', 'value': '==',
'start': self._position - 1, 'end': self._position}
self._next()
elif next_char == '~':
yield {'type': 'regex_match', 'value': '=~',
'start': self._position - 1, 'end': self._position}
self._next()
else:
if self._current is None:
# If we're at the EOF, we never advanced
Expand Down Expand Up @@ -196,6 +204,28 @@ def _consume_raw_string_literal(self):
return {'type': 'literal', 'value': lexeme,
'start': start, 'end': token_len}

def _consume_regex_literal(self):
start = self._position
regex_pattern = self._consume_until("/").replace("\\/", "/")
regex_flags = 0
while self._current in ['i', 'm', 's', 'l', 'a', 'u']:
try:
regex_flags |= getattr(re, self._current.upper())
except AttributeError as e:
raise LexerError(lexer_position=start,
lexer_value=''.join(self._chars[start:self._position]),
message='regex error: flag "{}" is unavailable in this version of Python'.format(self._current))
self._next()
token_len = self._position - start
try:
regex = re.compile(regex_pattern, regex_flags)
return {'type': 'literal', 'value': regex,
'start': start, 'end': token_len}
except re.error as e:
raise LexerError(lexer_position=start,
lexer_value=''.join(self._chars[start:self._position]),
message='regex error: ' + str(e))

def _match_or_else(self, expected, match_type, else_type):
start = self._position
current = self._current
Expand Down
4 changes: 4 additions & 0 deletions jmespath/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class Parser(object):
'gte': 5,
'lte': 5,
'ne': 5,
'regex_match': 5,
'flatten': 9,
# Everything above stops a projection.
'star': 20,
Expand Down Expand Up @@ -306,6 +307,9 @@ def _token_led_eq(self, left):
def _token_led_ne(self, left):
return self._parse_comparator(left, 'ne')

def _token_led_regex_match(self, left):
return self._parse_comparator(left, 'regex_match')

def _token_led_gt(self, left):
return self._parse_comparator(left, 'gt')

Expand Down
15 changes: 14 additions & 1 deletion jmespath/visitor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import operator
import re

from jmespath import functions
from jmespath.compat import string_type
Expand All @@ -12,6 +13,17 @@ def _equals(x, y):
return x == y


def _regex_match(lhs, rhs):
try:
if hasattr(rhs, 'search'):
return rhs.search(lhs) is not None
if hasattr(lhs, 'search'):
return lhs.search(rhs) is not None
return re.search(rhs, lhs) is not None
except TypeError:
return None


def _is_special_integer_case(x, y):
# We need to special case comparing 0 or 1 to
# True/False. While normally comparing any
Expand Down Expand Up @@ -101,12 +113,13 @@ class TreeInterpreter(Visitor):
COMPARATOR_FUNC = {
'eq': _equals,
'ne': lambda x, y: not _equals(x, y),
'regex_match': _regex_match,
'lt': operator.lt,
'gt': operator.gt,
'lte': operator.le,
'gte': operator.ge
}
_EQUALITY_OPS = ['eq', 'ne']
_EQUALITY_OPS = ['eq', 'ne', 'regex_match']
MAP_TYPE = dict

def __init__(self, options=None):
Expand Down
49 changes: 49 additions & 0 deletions tests/compliance/filters.json
Original file line number Diff line number Diff line change
Expand Up @@ -464,5 +464,54 @@
"result": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
}
]
},
{
"given": {
"foo": [
{"name": "ax"},
{"name": "Ax"},
{"name": "bx"},
{"name": "Bx"}
]
},
"cases": [
{
"comment": "Using regex in a filter expression",
"expression": "foo[? name =~ '^a']",
"result": [
{"name": "ax"}
]
},
{
"comment": "Using regex in a filter expression (pre-compiled)",
"expression": "foo[? name =~ /^a/]",
"result": [
{"name": "ax"}
]
},
{
"comment": "Using regex in a filter expression (pre-compiled with flag)",
"expression": "foo[? name =~ /^a/i]",
"result": [
{"name": "ax"},
{"name": "Ax"}
]
},
{
"comment": "Using regex as a lhs in a filter expression (pre-compiled)",
"expression": "foo[? /^a/ =~ name]",
"result": [
{"name": "ax"}
]
},
{
"comment": "Using regex as a lhs in a filter expression (pre-compiled with flag)",
"expression": "foo[? /^a/i =~ name]",
"result": [
{"name": "ax"},
{"name": "Ax"}
]
}
]
}
]
17 changes: 17 additions & 0 deletions tests/test_lexer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from tests import unittest

from jmespath import lexer
Expand Down Expand Up @@ -108,6 +109,22 @@ def test_literal_with_empty_string(self):
tokens = list(self.lexer.tokenize('``'))
self.assert_tokens(tokens, [{'type': 'literal', 'value': ''}])

def test_literal_regex(self):
tokens = list(self.lexer.tokenize('/foo/'))
self.assert_tokens(tokens, [
{'type': 'literal', 'value': re.compile('foo')},
])

def test_literal_regex_with_flags(self):
tokens = list(self.lexer.tokenize('/foo/im'))
self.assert_tokens(tokens, [
{'type': 'literal', 'value': re.compile('foo', re.I | re.M)},
])

def test_literal_invalid_regex(self):
with self.assertRaises(LexerError):
list(self.lexer.tokenize('/*/'))

def test_position_information(self):
tokens = list(self.lexer.tokenize('foo'))
self.assertEqual(
Expand Down