From 47a4316d04239a984a2b301bc102d4427213b47d Mon Sep 17 00:00:00 2001 From: tmshn Date: Wed, 11 Oct 2017 18:07:57 +0900 Subject: [PATCH 1/3] Added regex literal --- jmespath/lexer.py | 25 +++++++++++++++++++++++++ tests/test_lexer.py | 17 +++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/jmespath/lexer.py b/jmespath/lexer.py index 8db05e37..91376e09 100644 --- a/jmespath/lexer.py +++ b/jmespath/lexer.py @@ -1,6 +1,7 @@ import string import warnings from json import loads +import re from jmespath.exceptions import LexerError, EmptyExpressionError @@ -62,6 +63,8 @@ def tokenize(self, expression): yield self._match_or_else('&', 'and', 'expref') elif self._current == '`': yield self._consume_literal() + elif self._current == '/': + yield self._consume_regex_literal() elif self._current in self.VALID_NUMBER: start = self._position buff = self._consume_number() @@ -196,6 +199,28 @@ def _consume_raw_string_literal(self): return {'type': 'literal', 'value': lexeme, 'start': start, 'end': token_len} + def _consume_regex_literal(self): + start = self._position + regex_pattern = self._consume_until("/").replace("\\/", "/") + regex_flags = 0 + while self._current in ['i', 'm', 's', 'l', 'a', 'u']: + try: + regex_flags |= getattr(re, self._current.upper()) + except AttributeError as e: + raise LexerError(lexer_position=start, + lexer_value=''.join(self._chars[start:self._position]), + message='regex error: flag "{}" is unavailable in this version of Python'.format(self._current)) + self._next() + token_len = self._position - start + try: + regex = re.compile(regex_pattern, regex_flags) + return {'type': 'literal', 'value': regex, + 'start': start, 'end': token_len} + except re.error as e: + raise LexerError(lexer_position=start, + lexer_value=''.join(self._chars[start:self._position]), + message='regex error: ' + str(e)) + def _match_or_else(self, expected, match_type, else_type): start = self._position current = self._current diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 6d0dd2be..ae3807db 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -1,3 +1,4 @@ +import re from tests import unittest from jmespath import lexer @@ -108,6 +109,22 @@ def test_literal_with_empty_string(self): tokens = list(self.lexer.tokenize('``')) self.assert_tokens(tokens, [{'type': 'literal', 'value': ''}]) + def test_literal_regex(self): + tokens = list(self.lexer.tokenize('/foo/')) + self.assert_tokens(tokens, [ + {'type': 'literal', 'value': re.compile('foo')}, + ]) + + def test_literal_regex_with_flags(self): + tokens = list(self.lexer.tokenize('/foo/im')) + self.assert_tokens(tokens, [ + {'type': 'literal', 'value': re.compile('foo', re.I | re.M)}, + ]) + + def test_literal_invalid_regex(self): + with self.assertRaises(LexerError): + list(self.lexer.tokenize('/*/')) + def test_position_information(self): tokens = list(self.lexer.tokenize('foo')) self.assertEqual( From ccb2158d59157a9bd76b953b71b1d934617d4238 Mon Sep 17 00:00:00 2001 From: tmshn Date: Wed, 11 Oct 2017 18:08:49 +0900 Subject: [PATCH 2/3] Added regex match operator '=~' --- jmespath/lexer.py | 7 ++++- jmespath/parser.py | 4 +++ jmespath/visitor.py | 15 ++++++++++- tests/compliance/filters.json | 49 +++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/jmespath/lexer.py b/jmespath/lexer.py index 91376e09..d89b732a 100644 --- a/jmespath/lexer.py +++ b/jmespath/lexer.py @@ -90,10 +90,15 @@ def tokenize(self, expression): elif self._current == '!': yield self._match_or_else('=', 'ne', 'not') elif self._current == '=': - if self._next() == '=': + next_char = self._next() + if next_char == '=': yield {'type': 'eq', 'value': '==', 'start': self._position - 1, 'end': self._position} self._next() + elif next_char == '~': + yield {'type': 'regex_match', 'value': '=~', + 'start': self._position - 1, 'end': self._position} + self._next() else: if self._current is None: # If we're at the EOF, we never advanced diff --git a/jmespath/parser.py b/jmespath/parser.py index 4d5ba38a..724eb92d 100644 --- a/jmespath/parser.py +++ b/jmespath/parser.py @@ -57,6 +57,7 @@ class Parser(object): 'gte': 5, 'lte': 5, 'ne': 5, + 'regex_match': 5, 'flatten': 9, # Everything above stops a projection. 'star': 20, @@ -306,6 +307,9 @@ def _token_led_eq(self, left): def _token_led_ne(self, left): return self._parse_comparator(left, 'ne') + def _token_led_regex_match(self, left): + return self._parse_comparator(left, 'regex_match') + def _token_led_gt(self, left): return self._parse_comparator(left, 'gt') diff --git a/jmespath/visitor.py b/jmespath/visitor.py index 2c783e5e..ba185910 100644 --- a/jmespath/visitor.py +++ b/jmespath/visitor.py @@ -1,4 +1,5 @@ import operator +import re from jmespath import functions from jmespath.compat import string_type @@ -12,6 +13,17 @@ def _equals(x, y): return x == y +def _regex_match(lhs, rhs): + try: + if hasattr(rhs, 'search'): + return rhs.search(lhs) is not None + if hasattr(lhs, 'search'): + return lhs.search(rhs) is not None + return re.search(rhs, lhs) is not None + except TypeError: + return None + + def _is_special_integer_case(x, y): # We need to special case comparing 0 or 1 to # True/False. While normally comparing any @@ -101,12 +113,13 @@ class TreeInterpreter(Visitor): COMPARATOR_FUNC = { 'eq': _equals, 'ne': lambda x, y: not _equals(x, y), + 'regex_match': _regex_match, 'lt': operator.lt, 'gt': operator.gt, 'lte': operator.le, 'gte': operator.ge } - _EQUALITY_OPS = ['eq', 'ne'] + _EQUALITY_OPS = ['eq', 'ne', 'regex_match'] MAP_TYPE = dict def __init__(self, options=None): diff --git a/tests/compliance/filters.json b/tests/compliance/filters.json index 5b9f52b1..54588c69 100644 --- a/tests/compliance/filters.json +++ b/tests/compliance/filters.json @@ -464,5 +464,54 @@ "result": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] } ] + }, + { + "given": { + "foo": [ + {"name": "ax"}, + {"name": "Ax"}, + {"name": "bx"}, + {"name": "Bx"} + ] + }, + "cases": [ + { + "comment": "Using regex in a filter expression", + "expression": "foo[? name =~ '^a']", + "result": [ + {"name": "ax"} + ] + }, + { + "comment": "Using regex in a filter expression (pre-compiled)", + "expression": "foo[? name =~ /^a/]", + "result": [ + {"name": "ax"} + ] + }, + { + "comment": "Using regex in a filter expression (pre-compiled with flag)", + "expression": "foo[? name =~ /^a/i]", + "result": [ + {"name": "ax"}, + {"name": "Ax"} + ] + }, + { + "comment": "Using regex as a lhs in a filter expression (pre-compiled)", + "expression": "foo[? /^a/ =~ name]", + "result": [ + {"name": "ax"} + ] + }, + { + "comment": "Using regex as a lhs in a filter expression (pre-compiled with flag)", + "expression": "foo[? /^a/i =~ name]", + "result": [ + {"name": "ax"}, + {"name": "Ax"} + ] + } + ] } ] From 48e91600afeccc5c04806e07294ba16449ecc7bc Mon Sep 17 00:00:00 2001 From: tmshn Date: Wed, 11 Oct 2017 18:10:15 +0900 Subject: [PATCH 3/3] =?UTF-8?q?Fix=20bin/jp-compliance:=20specify=20encode?= =?UTF-8?q?=20for=20compatibility=20in=20Python3=20This=20avoids=20followi?= =?UTF-8?q?ng=20error=20in=20Python3:=20=E2=80=9CTypeError:=20a=20bytes-li?= =?UTF-8?q?ke=20object=20is=20required,=20not=20'str=E2=80=99=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/jp-compliance | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/jp-compliance b/bin/jp-compliance index 3b66ad45..115e8972 100755 --- a/bin/jp-compliance +++ b/bin/jp-compliance @@ -127,7 +127,8 @@ class ComplianceTestRunner(object): command.append(test_case['expression']) process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - stdin=subprocess.PIPE) + stdin=subprocess.PIPE, + encoding='utf-8') process.stdin.write(json.dumps(test_case['given'])) process.stdin.flush() stdout, stderr = process.communicate()