jmespath · tmshn · Oct 11, 2017 · Oct 11, 2017 · Oct 11, 2017
diff --git a/bin/jp-compliance b/bin/jp-compliance
@@ -127,7 +127,8 @@ class ComplianceTestRunner(object):
         command.append(test_case['expression'])
         process = subprocess.Popen(command, stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
-                                   stdin=subprocess.PIPE)
+                                   stdin=subprocess.PIPE,
+                                   encoding='utf-8')
         process.stdin.write(json.dumps(test_case['given']))
         process.stdin.flush()
         stdout, stderr = process.communicate()

diff --git a/jmespath/lexer.py b/jmespath/lexer.py
@@ -1,6 +1,7 @@
 import string
 import warnings
 from json import loads
+import re
 
 from jmespath.exceptions import LexerError, EmptyExpressionError
 
@@ -62,6 +63,8 @@ def tokenize(self, expression):
                 yield self._match_or_else('&', 'and', 'expref')
             elif self._current == '`':
                 yield self._consume_literal()
+            elif self._current == '/':
+                yield self._consume_regex_literal()
             elif self._current in self.VALID_NUMBER:
                 start = self._position
                 buff = self._consume_number()
@@ -87,10 +90,15 @@ def tokenize(self, expression):
             elif self._current == '!':
                 yield self._match_or_else('=', 'ne', 'not')
             elif self._current == '=':
-                if self._next() == '=':
+                next_char = self._next()
+                if next_char == '=':
                     yield {'type': 'eq', 'value': '==',
                         'start': self._position - 1, 'end': self._position}
                     self._next()
+                elif next_char == '~':
+                    yield {'type': 'regex_match', 'value': '=~',
+                        'start': self._position - 1, 'end': self._position}
+                    self._next()
                 else:
                     if self._current is None:
                         # If we're at the EOF, we never advanced
@@ -196,6 +204,28 @@ def _consume_raw_string_literal(self):
         return {'type': 'literal', 'value': lexeme,
                 'start': start, 'end': token_len}
 
+    def _consume_regex_literal(self):
+        start = self._position
+        regex_pattern = self._consume_until("/").replace("\\/", "/")
+        regex_flags = 0
+        while self._current in ['i', 'm', 's', 'l', 'a', 'u']:
+            try:
+                regex_flags |= getattr(re, self._current.upper())
+            except AttributeError as e:
+                raise LexerError(lexer_position=start,
+                                 lexer_value=''.join(self._chars[start:self._position]),
+                                 message='regex error: flag "{}" is unavailable in this version of Python'.format(self._current))
+            self._next()
+        token_len = self._position - start
+        try:
+            regex = re.compile(regex_pattern, regex_flags)
+            return {'type': 'literal', 'value': regex,
+                    'start': start, 'end': token_len}
+        except re.error as e:
+            raise LexerError(lexer_position=start,
+                             lexer_value=''.join(self._chars[start:self._position]),
+                             message='regex error: ' + str(e))
+
     def _match_or_else(self, expected, match_type, else_type):
         start = self._position
         current = self._current

diff --git a/jmespath/parser.py b/jmespath/parser.py
@@ -57,6 +57,7 @@ class Parser(object):
         'gte': 5,
         'lte': 5,
         'ne': 5,
+        'regex_match': 5,
         'flatten': 9,
         # Everything above stops a projection.
         'star': 20,
@@ -306,6 +307,9 @@ def _token_led_eq(self, left):
     def _token_led_ne(self, left):
         return self._parse_comparator(left, 'ne')
 
+    def _token_led_regex_match(self, left):
+        return self._parse_comparator(left, 'regex_match')
+
     def _token_led_gt(self, left):
         return self._parse_comparator(left, 'gt')
 

diff --git a/jmespath/visitor.py b/jmespath/visitor.py
@@ -1,4 +1,5 @@
 import operator
+import re
 
 from jmespath import functions
 from jmespath.compat import string_type
@@ -12,6 +13,17 @@ def _equals(x, y):
         return x == y
 
 
+def _regex_match(lhs, rhs):
+    try:
+        if hasattr(rhs, 'search'):
+            return rhs.search(lhs) is not None
+        if hasattr(lhs, 'search'):
+            return lhs.search(rhs) is not None
+        return re.search(rhs, lhs) is not None
+    except TypeError:
+        return None
+
+
 def _is_special_integer_case(x, y):
     # We need to special case comparing 0 or 1 to
     # True/False.  While normally comparing any
@@ -101,12 +113,13 @@ class TreeInterpreter(Visitor):
     COMPARATOR_FUNC = {
         'eq': _equals,
         'ne': lambda x, y: not _equals(x, y),
+        'regex_match': _regex_match,
         'lt': operator.lt,
         'gt': operator.gt,
         'lte': operator.le,
         'gte': operator.ge
     }
-    _EQUALITY_OPS = ['eq', 'ne']
+    _EQUALITY_OPS = ['eq', 'ne', 'regex_match']
     MAP_TYPE = dict
 
     def __init__(self, options=None):

diff --git a/tests/compliance/filters.json b/tests/compliance/filters.json
@@ -464,5 +464,54 @@
         "result": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
       }
     ]
+  },
+  {
+    "given": {
+      "foo": [
+        {"name": "ax"},
+        {"name": "Ax"},
+        {"name": "bx"},
+        {"name": "Bx"}
+      ]
+    },
+    "cases": [
+      {
+        "comment": "Using regex in a filter expression",
+        "expression": "foo[? name =~ '^a']",
+        "result": [
+          {"name": "ax"}
+        ]
+      },
+      {
+        "comment": "Using regex in a filter expression (pre-compiled)",
+        "expression": "foo[? name =~ /^a/]",
+        "result": [
+          {"name": "ax"}
+        ]
+      },
+      {
+        "comment": "Using regex in a filter expression (pre-compiled with flag)",
+        "expression": "foo[? name =~ /^a/i]",
+        "result": [
+          {"name": "ax"},
+          {"name": "Ax"}
+        ]
+      },
+      {
+        "comment": "Using regex as a lhs in a filter expression (pre-compiled)",
+        "expression": "foo[? /^a/ =~ name]",
+        "result": [
+          {"name": "ax"}
+        ]
+      },
+      {
+        "comment": "Using regex as a lhs in a filter expression (pre-compiled with flag)",
+        "expression": "foo[? /^a/i =~ name]",
+        "result": [
+          {"name": "ax"},
+          {"name": "Ax"}
+        ]
+      }
+    ]
   }
 ]
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -1,3 +1,4 @@
+import re
 from tests import unittest
 
 from jmespath import lexer
@@ -108,6 +109,22 @@ def test_literal_with_empty_string(self):
         tokens = list(self.lexer.tokenize('``'))
         self.assert_tokens(tokens, [{'type': 'literal', 'value': ''}])
 
+    def test_literal_regex(self):
+        tokens = list(self.lexer.tokenize('/foo/'))
+        self.assert_tokens(tokens, [
+            {'type': 'literal', 'value': re.compile('foo')},
+        ])
+
+    def test_literal_regex_with_flags(self):
+        tokens = list(self.lexer.tokenize('/foo/im'))
+        self.assert_tokens(tokens, [
+            {'type': 'literal', 'value': re.compile('foo', re.I | re.M)},
+        ])
+
+    def test_literal_invalid_regex(self):
+        with self.assertRaises(LexerError):
+            list(self.lexer.tokenize('/*/'))
+
     def test_position_information(self):
         tokens = list(self.lexer.tokenize('foo'))
         self.assertEqual(