From 9b58ac86684b86cfa89cddd837fe69ea2c01db68 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Thu, 9 Jan 2025 19:19:54 -0600 Subject: [PATCH] feat(search): Add additional search connectors support to text filters --- cl/lib/elasticsearch_utils.py | 6 ++ cl/search/templates/includes/no_results.html | 2 +- cl/search/tests/tests.py | 90 +++++++++++++++++++- cl/search/tests/tests_es_opinion.py | 4 - 4 files changed, 96 insertions(+), 6 deletions(-) diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 2b8b0af263..d23e4b8288 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -43,12 +43,14 @@ ) from cl.lib.utils import ( check_for_proximity_tokens, + check_query_for_disallowed_wildcards, check_unbalanced_parenthesis, check_unbalanced_quotes, cleanup_main_query, get_array_of_selected_fields, lookup_child_courts, map_to_docket_entry_sorting, + perform_special_character_replacements, ) from cl.people_db.models import Position from cl.search.constants import ( @@ -77,6 +79,7 @@ ) from cl.search.exception import ( BadProximityQuery, + DisallowedWildcardPattern, ElasticBadRequestError, QueryType, UnbalancedParenthesesQuery, @@ -488,6 +491,9 @@ def build_text_filter(field: str, value: str) -> List: if value: if isinstance(value, str): validate_query_syntax(value, QueryType.FILTER) + if check_query_for_disallowed_wildcards(value): + raise DisallowedWildcardPattern(QueryType.FILTER) + value = perform_special_character_replacements(value) return [ Q( "query_string", diff --git a/cl/search/templates/includes/no_results.html b/cl/search/templates/includes/no_results.html index 0995315329..f42f7b300a 100644 --- a/cl/search/templates/includes/no_results.html +++ b/cl/search/templates/includes/no_results.html @@ -34,7 +34,7 @@

{% elif error_message == "unbalanced_quotes" %} Did you forget to close one or more quotes? {% elif error_message == "disallowed_wildcard_pattern" %} - The query contains a disallowed wildcard pattern. + The query contains a disallowed expensive wildcard pattern. {% endif %} {% else %} encountered an error. diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index 31589abd5f..9e38f46b49 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -457,6 +457,7 @@ def setUpTestData(cls): docket=DocketFactory( court=cls.child_court_2_2, docket_number="36-2000" ), + judges="Computer point", precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, sub_opinions=RelatedFactory( OpinionWithChildrenFactory, @@ -471,6 +472,7 @@ def setUpTestData(cls): docket=DocketFactory( court=cls.child_gand_2, docket_number="38-1000" ), + judges="Composition plant", precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, sub_opinions=RelatedFactory( OpinionWithChildrenFactory, @@ -1085,6 +1087,74 @@ def test_support_search_connectors(self) -> None: msg=f"Failed on: {test_case['label']} missing {expected_str}", ) + def test_support_search_connectors_filters(self) -> None: + """Verify that new supported custom search connectors yield the + expected results. + """ + + tests = [ + { + "label": "But not %", + "search_params": { + "case_name": "Strickland % Lorem % America", + }, + "expected_count": 1, + "expected_in_content": ["1:21-cv-1234"], + }, + { + "label": "& connector test", + "search_params": { + "case_name": "Strickland & Lorem", + }, + "expected_count": 1, + "expected_in_content": ["123456"], + }, + { + "label": "! Root expander suffix", + "search_params": { + "judge": "!Comp", + }, + "expected_count": 2, + "expected_in_content": ["36-2000", "38-1000"], + }, + { + "label": "Universal Character *", + "search_params": { + "judge": "p**nt", + }, + "expected_count": 2, + "expected_in_content": ["36-2000", "38-1000"], + }, + { + "label": "Combined operators", + "search_params": { + "case_name": "Calif*rnia & !Nev", + }, + "expected_count": 1, + "expected_in_content": ["38-1000"], + }, + ] + + for test_case in tests: + with self.subTest(label=test_case["label"]): + response = self.client.get( + reverse("show_results"), + test_case["search_params"], + ) + actual = self.get_article_count(response) + self.assertEqual( + actual, + test_case["expected_count"], + msg=f"Failed on: {test_case['label']}", + ) + decoded_content = response.content.decode() + for expected_str in test_case["expected_in_content"]: + self.assertIn( + expected_str, + decoded_content, + msg=f"Failed on: {test_case['label']} missing {expected_str}", + ) + def test_disallowed_wildcard_pattern(self) -> None: """Verify that expensive wildcard queries thrown an error.""" @@ -1107,6 +1177,24 @@ def test_disallowed_wildcard_pattern(self) -> None: "q": "*ing", }, }, + { + "label": "Disallowed ! in short queries - Filter.", + "search_params": { + "case_name": "!ap", + }, + }, + { + "label": "Disallowed * at the end in short queries - Filter.", + "search_params": { + "judge": "ap*", + }, + }, + { + "label": "Disallowed * at the beginning - Filter.", + "search_params": { + "case_name": "*ing", + }, + }, ] for test_case in tests: @@ -1117,7 +1205,7 @@ def test_disallowed_wildcard_pattern(self) -> None: ) decoded_content = response.content.decode() self.assertIn( - "The query contains a disallowed wildcard pattern.", + "The query contains a disallowed expensive wildcard pattern", decoded_content, msg=f"Failed on: {test_case['label']}, no disallowed wildcard pattern error.", ) diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py index 358f7c2725..7014aaa8c8 100644 --- a/cl/search/tests/tests_es_opinion.py +++ b/cl/search/tests/tests_es_opinion.py @@ -1818,10 +1818,6 @@ async def test_can_use_negation_in_queries(self) -> None: r = await self._test_article_count(search_params, 0, "negation query") self.assertIn("had no results", r.content.decode()) - search_params["q"] = "Howard !Honda" - r = await self._test_article_count(search_params, 0, "negation query") - self.assertIn("had no results", r.content.decode()) - search_params["q"] = "Howard -Honda" r = await self._test_article_count(search_params, 0, "negation query") self.assertIn("had no results", r.content.decode())