From 5684f6ab3af2686eb6b4b070a6a9f23aa0db4147 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Wed, 1 Jan 2025 12:22:53 -0600 Subject: [PATCH] =?UTF-8?q?fix(search):=20Fixed=20phrase=20search=20querie?= =?UTF-8?q?s=20with=20=C2=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: #4432 --- cl/search/tests/tests.py | 33 +++++++++++++++++++++++- cl/settings/third_party/elasticsearch.py | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index 82784ce5ea..2ca99e9b9a 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -422,13 +422,18 @@ def setUpTestData(cls): sub_opinions=RelatedFactory( OpinionWithChildrenFactory, factory_related_name="cluster", - html_columbia="

Code, § 1-815

", + html_columbia="

Code, § 1-815 Lorem §247

", ), precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, ) OpinionClusterFactoryWithChildrenAndParents( case_name="Strickland v. Lorem.", docket=DocketFactory(court=cls.court, docket_number="123456"), + sub_opinions=RelatedFactory( + OpinionWithChildrenFactory, + factory_related_name="cluster", + plain_text="Random plain_text", + ), precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, ) OpinionClusterFactoryWithChildrenAndParents( @@ -436,6 +441,11 @@ def setUpTestData(cls): docket=DocketFactory( court=cls.child_court_1, docket_number="34-2535" ), + sub_opinions=RelatedFactory( + OpinionWithChildrenFactory, + factory_related_name="cluster", + plain_text="Lorem 247", + ), precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, ) OpinionClusterFactoryWithChildrenAndParents( @@ -443,6 +453,11 @@ def setUpTestData(cls): docket=DocketFactory( court=cls.child_court_2_2, docket_number="36-2000" ), + sub_opinions=RelatedFactory( + OpinionWithChildrenFactory, + factory_related_name="cluster", + plain_text="Random plain_text", + ), precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, ) @@ -451,6 +466,11 @@ def setUpTestData(cls): docket=DocketFactory( court=cls.child_gand_2, docket_number="38-1000" ), + sub_opinions=RelatedFactory( + OpinionWithChildrenFactory, + factory_related_name="cluster", + plain_text="Random plain_text", + ), precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, ) call_command( @@ -838,6 +858,17 @@ def test_raise_forbidden_error_on_depth_pagination(self) -> None: ) self.assertEqual(r.status_code, HTTPStatus.FORBIDDEN) + async def test_avoid_splitting_terms_on_phrase_queries(self) -> None: + """Can we avoid splitting words in phrase queries such as "§247"?""" + + # A search for "Lorem §247" shouldn't match "Lorem 247" + r = await self.async_client.get( + reverse("show_results"), {"q": '"Lorem §247"'} + ) + actual = self.get_article_count(r) + self.assertEqual(actual, 1) + self.assertIn("1:21-cv-1234", r.content.decode()) + class SearchAPIV4CommonTest(ESIndexTestCase, TestCase): """Common tests for the Search API V4 endpoints.""" diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py index 7a1ec6b779..656ada84a1 100644 --- a/cl/settings/third_party/elasticsearch.py +++ b/cl/settings/third_party/elasticsearch.py @@ -125,6 +125,7 @@ "filter": { "custom_word_delimiter_filter": { "type": "word_delimiter", + "type_table": ["§ => ALPHA"], "split_on_numerics": False, "preserve_original": True, },