Skip to content

Commit

Permalink
Merge branch 'main' into update_ordering_command
Browse files Browse the repository at this point in the history
  • Loading branch information
quevon24 authored Aug 30, 2024
2 parents 9b4135c + 68cc6de commit 6680637
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 18 deletions.
8 changes: 8 additions & 0 deletions cl/recap/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,11 @@ class DocketDataFactory(DictFactory):
length=5, chars=string.ascii_lowercase
)
federal_defendant_number = Faker("pyint", min_value=1, max_value=999)


class DocketEntryWithAttachmentsDataFactory(MinuteDocketEntryDataFactory):
attachments = List([SubFactory(AppellateAttachmentPageFactory)])


class DocketDataWithAttachmentsFactory(DocketDataFactory):
docket_entries = List([SubFactory(DocketEntryWithAttachmentsDataFactory)])
32 changes: 26 additions & 6 deletions cl/recap/mergers.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,7 @@ async def add_docket_entries(
get_params = deepcopy(params)
if de_created is False and not appelate_court_id_exists:
del get_params["document_type"]
get_params["pacer_doc_id"] = docket_entry["pacer_doc_id"]
rd = await RECAPDocument.objects.aget(**get_params)
rds_updated.append(rd)
except RECAPDocument.DoesNotExist:
Expand Down Expand Up @@ -1715,7 +1716,7 @@ async def merge_attachment_page_data(
main_rd_to_att = False
for attachment in attachment_dicts:
sanity_checks = [
attachment["attachment_number"],
attachment.get("attachment_number") is not None,
# Missing on sealed items.
attachment.get("pacer_doc_id", False),
attachment["description"],
Expand All @@ -1742,24 +1743,27 @@ async def merge_attachment_page_data(
params = {
"docket_entry": de,
"document_number": document_number,
"attachment_number": attachment["attachment_number"],
"document_type": RECAPDocument.ATTACHMENT,
}
if attachment["attachment_number"] == 0:
params["document_type"] = RECAPDocument.PACER_DOCUMENT
else:
params["attachment_number"] = attachment["attachment_number"]
params["document_type"] = RECAPDocument.ATTACHMENT
if "acms_document_guid" in attachment:
params["acms_document_guid"] = attachment["acms_document_guid"]
try:
rd = await RECAPDocument.objects.aget(**params)
except RECAPDocument.DoesNotExist:
try:
doc_id_params = deepcopy(params)
del doc_id_params["attachment_number"]
doc_id_params.pop("attachment_number", None)
del doc_id_params["document_type"]
doc_id_params["pacer_doc_id"] = attachment["pacer_doc_id"]
rd = await RECAPDocument.objects.aget(**doc_id_params)
if attachment.get("attachment_number") == 0:
if attachment["attachment_number"] == 0:
try:
old_main_rd = await RECAPDocument.objects.aget(
de=de,
docket_entry=de,
document_type=RECAPDocument.PACER_DOCUMENT,
)
rd.description = old_main_rd.description
Expand All @@ -1779,6 +1783,22 @@ async def merge_attachment_page_data(
rd.document_type = RECAPDocument.ATTACHMENT
except RECAPDocument.DoesNotExist:
rd = RECAPDocument(**params)
if attachment["attachment_number"] == 0:
try:
old_main_rd = await RECAPDocument.objects.aget(
docket_entry=de,
document_type=RECAPDocument.PACER_DOCUMENT,
)
rd.description = old_main_rd.description
except RECAPDocument.DoesNotExist:
rd.description = ""
except RECAPDocument.MultipleObjectsReturned:
rd.description = ""
logger.info(
f"Failed to migrate description for "
f"{attachment["pacer_doc_id"]}, "
f"multiple source documents found."
)
rds_created.append(rd)

rds_affected.append(rd)
Expand Down
149 changes: 148 additions & 1 deletion cl/recap/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@
AppellateAttachmentFactory,
AppellateAttachmentPageFactory,
DocketDataFactory,
DocketDataWithAttachmentsFactory,
DocketEntriesDataFactory,
DocketEntryDataFactory,
DocketEntryWithAttachmentsDataFactory,
FjcIntegratedDatabaseFactory,
MinuteDocketEntryDataFactory,
PacerFetchQueueFactory,
Expand Down Expand Up @@ -2600,7 +2602,7 @@ def test_merge_docket_number_components(
class RecapDocketAttachmentTaskTest(TestCase):
@classmethod
def setUpTestData(cls):
CourtFactory(id="cand", jurisdiction="FD")
cls.court = CourtFactory(id="cand", jurisdiction="FD")

def setUp(self) -> None:
self.user = User.objects.get(username="recap")
Expand Down Expand Up @@ -2640,6 +2642,151 @@ def test_attachments_get_created(self, mock):
self.pq.refresh_from_db()
self.assertEqual(self.pq.status, PROCESSING_STATUS.SUCCESSFUL)

@mock.patch(
"cl.api.webhooks.requests.post",
side_effect=lambda *args, **kwargs: MockResponse(200, mock_raw=True),
)
def test_main_document_doesnt_match_attachment_zero_on_creation(
self,
mock_solr,
mock_webhook_post,
):
"""Confirm that attachment 0 is properly set as the Main document if
the docket entry's pacer_doc_id does not match the Main document's
pacer_doc_id on creation.
"""
docket = DocketFactory(
source=Docket.RECAP,
court=self.court,
pacer_case_id="238743",
)
docket_data = DocketDataWithAttachmentsFactory(
docket_entries=[
DocketEntryWithAttachmentsDataFactory(
document_number=1,
pacer_doc_id="1234567",
short_description="Complaint",
attachments=[
AppellateAttachmentFactory(
attachment_number=0,
pacer_doc_id="1234566",
description="Main Document",
),
AppellateAttachmentFactory(
attachment_number=1,
pacer_doc_id="1234567",
description="Attachment 1",
),
],
),
],
)
async_to_sync(add_docket_entries)(
docket, docket_data["docket_entries"]
)
main_rd = RECAPDocument.objects.get(pacer_doc_id="1234566")
attachment_1 = RECAPDocument.objects.get(pacer_doc_id="1234567")
self.assertEqual(
main_rd.document_type,
RECAPDocument.PACER_DOCUMENT,
msg="PACER_DOCUMENT type didn't match.",
)
self.assertEqual(main_rd.attachment_number, None)
self.assertEqual(main_rd.description, "Complaint")

self.assertEqual(
attachment_1.document_type,
RECAPDocument.ATTACHMENT,
msg="ATTACHMENT type didn't match.",
)
self.assertEqual(attachment_1.attachment_number, 1)
self.assertEqual(attachment_1.description, "Attachment 1")

@mock.patch(
"cl.api.webhooks.requests.post",
side_effect=lambda *args, **kwargs: MockResponse(200, mock_raw=True),
)
def test_main_document_doesnt_match_attachment_zero_existing(
self,
mock_solr,
mock_webhook_post,
):
"""Confirm that attachment 0 is properly set as the Main document if
the docket entry's pacer_doc_id does not match the Main document's
pacer_doc_id on an existing document.
"""
docket = DocketFactory(
source=Docket.RECAP,
court=self.court,
pacer_case_id="238743",
)
docket_data_no_att = DocketDataWithAttachmentsFactory(
docket_entries=[
DocketEntryWithAttachmentsDataFactory(
document_number=1, pacer_doc_id="1234567", attachments=[]
),
],
)
async_to_sync(add_docket_entries)(
docket, docket_data_no_att["docket_entries"]
)

# When attachment data is unknown, the main PACER_DOCUMENT should be
# set to pacer_doc_id 1234567.
main_rd = RECAPDocument.objects.get(pacer_doc_id="1234567")
self.assertEqual(
main_rd.document_type,
RECAPDocument.PACER_DOCUMENT,
msg="PACER_DOCUMENT type didn't match.",
)
self.assertEqual(main_rd.attachment_number, None)

docket_data_att = DocketDataWithAttachmentsFactory(
docket_entries=[
DocketEntryWithAttachmentsDataFactory(
document_number=1,
pacer_doc_id="1234567",
short_description="Complaint",
attachments=[
AppellateAttachmentFactory(
attachment_number=0,
pacer_doc_id="1234566",
description="Main Document",
),
AppellateAttachmentFactory(
attachment_number=1,
pacer_doc_id="1234567",
description="Attachment 1",
),
],
),
],
)
async_to_sync(add_docket_entries)(
docket, docket_data_att["docket_entries"]
)

# After merging attachments, the main PACER_DOCUMENT should now be set
# to attachment 0 with pacer_doc_id 1234566.
main_rd = RECAPDocument.objects.get(pacer_doc_id="1234566")
self.assertEqual(
main_rd.document_type,
RECAPDocument.PACER_DOCUMENT,
msg="PACER_DOCUMENT type didn't match.",
)
self.assertEqual(main_rd.attachment_number, None)
self.assertEqual(main_rd.description, "Complaint")

# pacer_doc_id 1234567 should now be an attachment.
attachment_1 = RECAPDocument.objects.get(pacer_doc_id="1234567")
self.assertEqual(
attachment_1.document_type,
RECAPDocument.ATTACHMENT,
msg="ATTACHMENT type didn't match.",
)
self.assertEqual(attachment_1.attachment_number, 1)
self.assertEqual(attachment_1.description, "Attachment 1")


class ClaimsRegistryTaskTest(TestCase):
"""Can we handle claims registry uploads?"""
Expand Down
9 changes: 1 addition & 8 deletions cl/search/management/commands/sweep_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,7 @@
)
from cl.search.types import ESDocumentClassType

supported_models = [
"audio.Audio",
"people_db.Person",
"search.OpinionCluster",
"search.Opinion",
"search.Docket",
"search.RECAPDocument",
]
supported_models = settings.ELASTICSEARCH_SWEEP_INDEXER_MODELS # type: ignore
r = get_redis_interface("CACHE")


Expand Down
12 changes: 12 additions & 0 deletions cl/settings/third_party/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,18 @@
ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE = env(
"ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
)
ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
"ELASTICSEARCH_SWEEP_INDEXER_MODELS",
default=[
"audio.Audio",
"people_db.Person",
"search.OpinionCluster",
"search.Opinion",
"search.Docket",
"search.RECAPDocument",
],
)


ELASTICSEARCH_MAX_RESULT_COUNT = 10_000
ELASTICSEARCH_CARDINALITY_PRECISION = 2000
Expand Down
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6680637

Please sign in to comment.