diff --git a/cl/favorites/api_serializers.py b/cl/favorites/api_serializers.py index a12bb4c101..60b9efedbd 100644 --- a/cl/favorites/api_serializers.py +++ b/cl/favorites/api_serializers.py @@ -67,7 +67,7 @@ def validate(self, data): ) # Check if the user is eligible to create a new prayer - if not async_to_sync(prayer_eligible)(user): + if not async_to_sync(prayer_eligible)(user)[0]: raise ValidationError( f"You have reached the maximum number of prayers ({settings.ALLOWED_PRAYER_COUNT}) allowed in the last 24 hours." ) diff --git a/cl/favorites/templates/user_prayers.html b/cl/favorites/templates/user_prayers.html index 4ada025de6..f010632d16 100644 --- a/cl/favorites/templates/user_prayers.html +++ b/cl/favorites/templates/user_prayers.html @@ -14,7 +14,7 @@ {% block content %}

{% if is_page_owner %}Your PACER Document Prayers{% else %}PACER Document Requests for: {{ requested_user }}{% endif %}

- {% if is_page_owner %}

{{ count|intcomma }} prayers granted totaling ${{total_cost|floatformat:2 }}.

{% endif %} + {% if is_page_owner %}

{{ count|intcomma }} prayers granted totaling ${{total_cost|floatformat:2 }} ({{ num_remaining }} remaining today).

{% endif %}
{% if is_page_owner %}Your PACER Docume hx-trigger="prayersListChanged from:body" hx-swap="none" {%endif %} > -
- {% if is_page_owner %} -

- {% if is_eligible %}You are eligible to make document requests.{% else %}You have reached your daily limit; wait 24 hours to make new requests.{% endif %} -

- {% endif %} -
diff --git a/cl/favorites/tests.py b/cl/favorites/tests.py index 35c32652a2..e5ad413153 100644 --- a/cl/favorites/tests.py +++ b/cl/favorites/tests.py @@ -699,7 +699,7 @@ async def test_prayer_eligible(self) -> None: current_time = now() with time_machine.travel(current_time, tick=False): # No user prayers in the last 24 hours yet for this user. - user_is_eligible = await prayer_eligible(self.user) + user_is_eligible, _ = await prayer_eligible(self.user) self.assertTrue(user_is_eligible) # Add prays for this user. @@ -709,7 +709,7 @@ async def test_prayer_eligible(self) -> None: user_prays = Prayer.objects.filter(user=self.user) self.assertEqual(await user_prays.acount(), 1) - user_is_eligible = await prayer_eligible(self.user) + user_is_eligible, _ = await prayer_eligible(self.user) self.assertTrue(user_is_eligible) await sync_to_async(PrayerFactory)( @@ -719,7 +719,7 @@ async def test_prayer_eligible(self) -> None: # After two prays (ALLOWED_PRAYER_COUNT) in the last 24 hours. # The user is no longer eligible to create more prays - user_is_eligible = await prayer_eligible(self.user) + user_is_eligible, _ = await prayer_eligible(self.user) self.assertFalse(user_is_eligible) with time_machine.travel( @@ -730,7 +730,7 @@ async def test_prayer_eligible(self) -> None: user=self.user, recap_document=self.rd_3 ) self.assertEqual(await user_prays.acount(), 3) - user_is_eligible = await prayer_eligible(self.user) + user_is_eligible, _ = await prayer_eligible(self.user) self.assertTrue(user_is_eligible) async def test_create_prayer(self) -> None: diff --git a/cl/favorites/utils.py b/cl/favorites/utils.py index 9e87389890..7de09fbb7e 100644 --- a/cl/favorites/utils.py +++ b/cl/favorites/utils.py @@ -28,7 +28,7 @@ from cl.search.models import RECAPDocument -async def prayer_eligible(user: User) -> bool: +async def prayer_eligible(user: User) -> tuple[bool, int]: allowed_prayer_count = settings.ALLOWED_PRAYER_COUNT now = timezone.now() @@ -39,13 +39,15 @@ async def prayer_eligible(user: User) -> bool: user=user, date_created__gte=last_24_hours ).acount() - return prayer_count < allowed_prayer_count + return prayer_count < allowed_prayer_count, ( + allowed_prayer_count - prayer_count + ) async def create_prayer( user: User, recap_document: RECAPDocument ) -> Prayer | None: - if await prayer_eligible(user) and not recap_document.is_available: + if (await prayer_eligible(user))[0] and not recap_document.is_available: new_prayer, created = await Prayer.objects.aget_or_create( user=user, recap_document=recap_document ) diff --git a/cl/favorites/views.py b/cl/favorites/views.py index bd2c8ea5b5..7cd880e1af 100644 --- a/cl/favorites/views.py +++ b/cl/favorites/views.py @@ -212,7 +212,7 @@ async def create_prayer_view( user = request.user is_htmx_request = request.META.get("HTTP_HX_REQUEST", False) regular_size = bool(request.POST.get("regular_size")) - if not await prayer_eligible(request.user): + if not (await prayer_eligible(request.user))[0]: if is_htmx_request: return TemplateResponse( request, @@ -291,7 +291,7 @@ async def user_prayers_view( count, total_cost = await get_user_prayer_history(requested_user) - is_eligible = await prayer_eligible(requested_user) + is_eligible, num_remaining = await prayer_eligible(requested_user) context = { "rd_with_prayers": rd_with_prayers, @@ -300,6 +300,7 @@ async def user_prayers_view( "count": count, "total_cost": total_cost, "is_eligible": is_eligible, + "num_remaining": num_remaining, "private": False, } diff --git a/cl/recap/api_serializers.py b/cl/recap/api_serializers.py index e20c5be0a8..48fc52ef66 100644 --- a/cl/recap/api_serializers.py +++ b/cl/recap/api_serializers.py @@ -95,10 +95,10 @@ def validate(self, attrs): UPLOAD_TYPE.CASE_QUERY_RESULT_PAGE, ]: # These are district or bankruptcy court dockets. Is the court valid? - court_ids = Court.federal_courts.district_or_bankruptcy_pacer_courts().values_list( - "pk", flat=True + court_ids = ( + Court.federal_courts.district_or_bankruptcy_pacer_courts() ) - if attrs["court"].pk not in court_ids: + if not court_ids.filter(pk=attrs["court"].pk).exists(): raise ValidationError( "%s is not a district or bankruptcy court ID. Did you " "mean to use the upload_type for appellate dockets?" @@ -108,11 +108,9 @@ def validate(self, attrs): if attrs["upload_type"] == UPLOAD_TYPE.CLAIMS_REGISTER: # Only allowed on bankruptcy courts bankruptcy_court_ids = ( - Court.federal_courts.bankruptcy_pacer_courts().values_list( - "pk", flat=True - ) + Court.federal_courts.bankruptcy_pacer_courts() ) - if attrs["court"].pk not in bankruptcy_court_ids: + if not bankruptcy_court_ids.filter(pk=attrs["court"].pk).exists(): raise ValidationError( "%s is not a bankruptcy court ID. Only bankruptcy cases " "should have claims registry pages." % attrs["court"] @@ -127,12 +125,8 @@ def validate(self, attrs): UPLOAD_TYPE.APPELLATE_CASE_QUERY_RESULT_PAGE, ]: # Appellate court dockets. Is the court valid? - appellate_court_ids = ( - Court.federal_courts.appellate_pacer_courts().values_list( - "pk", flat=True - ) - ) - if attrs["court"].pk not in appellate_court_ids: + appellate_court_ids = Court.federal_courts.appellate_pacer_courts() + if not appellate_court_ids.filter(pk=attrs["court"].pk).exists(): raise ValidationError( "%s is not an appellate court ID. Did you mean to use the " "upload_type for district dockets?" % attrs["court"] @@ -203,11 +197,8 @@ def validate(self, attrs): mail = attrs["mail"] receipt = attrs["receipt"] - all_court_ids = Court.federal_courts.all_pacer_courts().values_list( - "pk", flat=True - ) - - if court_id not in all_court_ids: + all_court_ids = Court.federal_courts.all_pacer_courts() + if not all_court_ids.filter(pk=court_id).exists(): raise ValidationError( f"{attrs['court'].pk} is not a PACER court ID." ) @@ -274,10 +265,9 @@ class Meta: def validate(self, attrs): # Is it a good court value? - valid_court_ids = Court.federal_courts.district_or_bankruptcy_pacer_courts().values_list( - "pk", flat=True + valid_court_ids = ( + Court.federal_courts.district_or_bankruptcy_pacer_courts() ) - if ( attrs.get("court") or attrs.get("docket") @@ -293,7 +283,7 @@ def validate(self, attrs): if attrs.get("court") else attrs["docket"].court_id ) - if court_id not in valid_court_ids: + if not valid_court_ids.filter(pk=court_id).exists(): raise ValidationError(f"Invalid court id: {court_id}") # Docket validations diff --git a/cl/recap/factories.py b/cl/recap/factories.py index 9b786ed4fd..64f3afb714 100644 --- a/cl/recap/factories.py +++ b/cl/recap/factories.py @@ -93,6 +93,7 @@ class RECAPEmailDocketEntryDataFactory(DictFactory): pacer_doc_id = Faker("random_id_string") pacer_magic_num = Faker("random_id_string") pacer_seq_no = Faker("random_id_string") + short_description = Faker("text", max_nb_chars=15) class RECAPEmailDocketDataFactory(DictFactory): diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py index 0bbef5a5ec..95fd75cc98 100644 --- a/cl/recap/mergers.py +++ b/cl/recap/mergers.py @@ -822,6 +822,35 @@ async def get_or_make_docket_entry( return de, de_created +async def keep_latest_rd_document(queryset: QuerySet) -> RECAPDocument: + """Retains the most recent item with a PDF, if available otherwise, + retains the most recent item overall. + + :param queryset: RECAPDocument QuerySet to clean duplicates from. + :return: The matched RECAPDocument after cleaning. + """ + rd_with_pdf_queryset = queryset.filter(is_available=True).exclude( + filepath_local="" + ) + if await rd_with_pdf_queryset.aexists(): + rd = await rd_with_pdf_queryset.alatest("date_created") + else: + rd = await queryset.alatest("date_created") + await queryset.exclude(pk=rd.pk).adelete() + return rd + + +async def clean_duplicate_documents(params: dict[str, Any]) -> RECAPDocument: + """Removes duplicate RECAPDocuments, keeping the most recent with PDF if + available or otherwise the most recent overall. + + :param params: Query parameters to filter the RECAPDocuments. + :return: The matched RECAPDocument after cleaning. + """ + duplicate_rd_queryset = RECAPDocument.objects.filter(**params) + return await keep_latest_rd_document(duplicate_rd_queryset) + + async def add_docket_entries( d: Docket, docket_entries: list[dict[str, Any]], @@ -934,17 +963,28 @@ async def add_docket_entries( rd = await RECAPDocument.objects.aget(**get_params) rds_updated.append(rd) except RECAPDocument.DoesNotExist: - try: - params["pacer_doc_id"] = docket_entry["pacer_doc_id"] - rd = await RECAPDocument.objects.acreate( - document_number=docket_entry["document_number"] or "", - is_available=False, - **params, - ) - except ValidationError: - # Happens from race conditions. - continue - rds_created.append(rd) + rd = None + if de_created is False and not appelate_court_id_exists: + try: + # Check for documents with a bad pacer_doc_id + rd = await RECAPDocument.objects.aget(**params) + except RECAPDocument.DoesNotExist: + # Fallback to creating document + pass + except RECAPDocument.MultipleObjectsReturned: + rd = await clean_duplicate_documents(params) + if rd is None: + try: + params["pacer_doc_id"] = docket_entry["pacer_doc_id"] + rd = await RECAPDocument.objects.acreate( + document_number=docket_entry["document_number"] or "", + is_available=False, + **params, + ) + rds_created.append(rd) + except ValidationError: + # Happens from race conditions. + continue except RECAPDocument.MultipleObjectsReturned: logger.info( "Multiple recap documents found for document entry number'%s' " @@ -952,17 +992,10 @@ async def add_docket_entries( ) if params["document_type"] == RECAPDocument.ATTACHMENT: continue - duplicate_rd_queryset = RECAPDocument.objects.filter(**params) - rd_with_pdf_queryset = duplicate_rd_queryset.filter( - is_available=True - ).exclude(filepath_local="") - if await rd_with_pdf_queryset.aexists(): - rd = await rd_with_pdf_queryset.alatest("date_created") - else: - rd = await duplicate_rd_queryset.alatest("date_created") - await duplicate_rd_queryset.exclude(pk=rd.pk).adelete() + rd = await clean_duplicate_documents(params) - rd.pacer_doc_id = rd.pacer_doc_id or docket_entry["pacer_doc_id"] + if docket_entry["pacer_doc_id"]: + rd.pacer_doc_id = docket_entry["pacer_doc_id"] description = docket_entry.get("short_description") if rd.document_type == RECAPDocument.PACER_DOCUMENT and description: rd.description = description @@ -1604,14 +1637,7 @@ async def clean_duplicate_attachment_entries( ) async for dupe in dupes.aiterator(): duplicate_rd_queryset = rds.filter(pacer_doc_id=dupe.pacer_doc_id) - rd_with_pdf_queryset = duplicate_rd_queryset.filter( - is_available=True - ).exclude(filepath_local="") - if await rd_with_pdf_queryset.aexists(): - keep_rd = await rd_with_pdf_queryset.alatest("date_created") - else: - keep_rd = await duplicate_rd_queryset.alatest("date_created") - await duplicate_rd_queryset.exclude(pk=keep_rd.pk).adelete() + await keep_latest_rd_document(duplicate_rd_queryset) async def merge_attachment_page_data( @@ -1673,15 +1699,7 @@ async def merge_attachment_page_data( except RECAPDocument.MultipleObjectsReturned as exc: if pacer_case_id: - duplicate_rd_queryset = RECAPDocument.objects.filter(**params) - rd_with_pdf_queryset = duplicate_rd_queryset.filter( - is_available=True - ).exclude(filepath_local="") - if await rd_with_pdf_queryset.aexists(): - keep_rd = await rd_with_pdf_queryset.alatest("date_created") - else: - keep_rd = await duplicate_rd_queryset.alatest("date_created") - await duplicate_rd_queryset.exclude(pk=keep_rd.pk).adelete() + await clean_duplicate_documents(params) main_rd = await RECAPDocument.objects.select_related( "docket_entry", "docket_entry__docket" ).aget(**params) @@ -1711,23 +1729,7 @@ async def merge_attachment_page_data( break except RECAPDocument.MultipleObjectsReturned as exc: if pacer_case_id: - duplicate_rd_queryset = RECAPDocument.objects.filter( - **params - ) - rd_with_pdf_queryset = duplicate_rd_queryset.filter( - is_available=True - ).exclude(filepath_local="") - if await rd_with_pdf_queryset.aexists(): - keep_rd = await rd_with_pdf_queryset.alatest( - "date_created" - ) - else: - keep_rd = await duplicate_rd_queryset.alatest( - "date_created" - ) - await duplicate_rd_queryset.exclude( - pk=keep_rd.pk - ).adelete() + await clean_duplicate_documents(params) main_rd = await RECAPDocument.objects.select_related( "docket_entry", "docket_entry__docket" ).aget(**params) diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py index 026b1ca2ef..ee674a9f25 100644 --- a/cl/recap/tasks.py +++ b/cl/recap/tasks.py @@ -20,6 +20,7 @@ from django.core.files.base import ContentFile, File from django.core.files.uploadedfile import SimpleUploadedFile from django.db import IntegrityError, transaction +from django.db.models import QuerySet from django.utils.timezone import now from juriscraper.lib.exceptions import PacerLoginException, ParsingException from juriscraper.lib.string_utils import CaseNameTweaker, harmonize @@ -114,7 +115,9 @@ async def process_recap_upload(pq: ProcessingQueue) -> None: for pq_pk in sub_docket_att_page_pks: await process_recap_attachment(pq_pk) elif pq.upload_type == UPLOAD_TYPE.PDF: - await process_recap_pdf(pq.pk) + sub_docket_pdf_pks = await find_subdocket_pdf_rds(pq.pk) + for pq_pk in sub_docket_pdf_pks: + await process_recap_pdf(pq_pk) elif pq.upload_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: docket = await process_recap_docket_history_report(pq.pk) elif pq.upload_type == UPLOAD_TYPE.APPELLATE_DOCKET: @@ -676,6 +679,30 @@ async def get_att_data_from_pq( return pq, att_data, text +def get_main_rds(court_id: str, pacer_doc_id: str) -> QuerySet: + """ + Return the main RECAPDocument queryset for a given court and pacer_doc_id. + :param court_id: The court ID to query. + :param pacer_doc_id: The pacer document ID. + :return: The main RECAPDocument queryset. + """ + main_rds_qs = ( + RECAPDocument.objects.select_related("docket_entry__docket") + .filter( + pacer_doc_id=pacer_doc_id, + docket_entry__docket__court_id=court_id, + ) + .order_by("docket_entry__docket__pacer_case_id") + .distinct("docket_entry__docket__pacer_case_id") + .only( + "pacer_doc_id", + "docket_entry__docket__pacer_case_id", + "docket_entry__docket__court_id", + ) + ) + return main_rds_qs + + async def find_subdocket_att_page_rds( pk: int, ) -> list[int]: @@ -687,43 +714,100 @@ async def find_subdocket_att_page_rds( """ pq = await ProcessingQueue.objects.aget(pk=pk) - court = await Court.objects.aget(id=pq.court_id) pq, att_data, text = await get_att_data_from_pq(pq) pacer_doc_id = att_data["pacer_doc_id"] - main_rds = ( - RECAPDocument.objects.select_related("docket_entry__docket") - .filter( - pacer_doc_id=pacer_doc_id, - docket_entry__docket__court=court, - ) - .order_by("docket_entry__docket__pacer_case_id") - .distinct("docket_entry__docket__pacer_case_id") - .only( - "pacer_doc_id", - "docket_entry__docket__pacer_case_id", - "docket_entry__docket__court_id", - ) - .exclude(docket_entry__docket__pacer_case_id=pq.pacer_case_id) + main_rds = get_main_rds(pq.court_id, pacer_doc_id).exclude( + docket_entry__docket__pacer_case_id=pq.pacer_case_id ) pqs_to_process_pks = [ pq.pk ] # Add the original pq to the list of pqs to process original_file_content = text.encode("utf-8") original_file_name = pq.filepath_local.name - async for main_rd in main_rds: - main_pacer_case_id = main_rd.docket_entry.docket.pacer_case_id - # Create additional pqs for each subdocket case found. - pq_created = await ProcessingQueue.objects.acreate( - uploader_id=pq.uploader_id, - pacer_doc_id=pacer_doc_id, - pacer_case_id=main_pacer_case_id, - court_id=court.pk, - upload_type=UPLOAD_TYPE.ATTACHMENT_PAGE, - filepath_local=ContentFile( - original_file_content, name=original_file_name - ), + + @sync_to_async + def save_pq_instances(): + with transaction.atomic(): + for main_rd in main_rds: + main_pacer_case_id = main_rd.docket_entry.docket.pacer_case_id + # Create additional pqs for each subdocket case found. + pq_created = ProcessingQueue.objects.create( + uploader_id=pq.uploader_id, + pacer_doc_id=pacer_doc_id, + pacer_case_id=main_pacer_case_id, + court_id=pq.court_id, + upload_type=UPLOAD_TYPE.ATTACHMENT_PAGE, + filepath_local=ContentFile( + original_file_content, name=original_file_name + ), + ) + pqs_to_process_pks.append(pq_created.pk) + + await save_pq_instances() + return pqs_to_process_pks + + +async def find_subdocket_pdf_rds( + pk: int, +) -> list[int]: + """Look for RECAP Documents that belong to subdockets, and create a PQ + object for each additional PDF upload that requires processing. + + :param pk: Primary key of the processing queue item. + :return: A list of ProcessingQueue pks to process. + """ + + pq = await ProcessingQueue.objects.aget(pk=pk) + main_rds = get_main_rds(pq.court_id, pq.pacer_doc_id) + pqs_to_process_pks = [ + pq.pk + ] # Add the original pq to the list of pqs to process + + appellate_court_ids = Court.federal_courts.appellate_pacer_courts() + if await appellate_court_ids.filter(pk=pq.court_id).aexists(): + # Abort the process for appellate documents. Subdockets cannot be found + # in appellate cases. + return pqs_to_process_pks + + if pq.pacer_case_id: + # If pq already has a pacer_case_id, exclude it from the queryset. + main_rds = main_rds.exclude( + docket_entry__docket__pacer_case_id=pq.pacer_case_id ) - pqs_to_process_pks.append(pq_created.pk) + + pdf_binary_content = pq.filepath_local.read() + + @sync_to_async + def save_pq_instances(): + with transaction.atomic(): + for i, main_rd in enumerate(main_rds): + if i == 0 and not pq.pacer_case_id: + # If the original PQ does not have a pacer_case_id, + # assign it a pacer_case_id from one of the matched RDs + # to ensure the RD lookup in process_recap_pdf succeeds. + pq.pacer_case_id = ( + main_rd.docket_entry.docket.pacer_case_id + ) + pq.save() + continue + + main_pacer_case_id = main_rd.docket_entry.docket.pacer_case_id + # Create additional pqs for each subdocket case found. + pq_created = ProcessingQueue.objects.create( + uploader_id=pq.uploader_id, + pacer_doc_id=pq.pacer_doc_id, + pacer_case_id=main_pacer_case_id, + document_number=pq.document_number, + attachment_number=pq.attachment_number, + court_id=pq.court_id, + upload_type=UPLOAD_TYPE.PDF, + filepath_local=ContentFile( + pdf_binary_content, name=pq.filepath_local.name + ), + ) + pqs_to_process_pks.append(pq_created.pk) + + await save_pq_instances() return pqs_to_process_pks @@ -747,10 +831,6 @@ async def process_recap_attachment( await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}") - pq = await ProcessingQueue.objects.aget(pk=pk) - await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) - logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}") - pq, att_data, text = await get_att_data_from_pq(pq) if document_number is None: diff --git a/cl/recap/tests.py b/cl/recap/tests.py index bb249b6246..5013ff062c 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -17,6 +17,7 @@ from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.core.management import call_command +from django.db import transaction from django.test import RequestFactory, override_settings from django.urls import reverse from django.utils.timezone import now @@ -182,28 +183,6 @@ def setUpTestData(cls): ], ) - cls.att_data_2 = AppellateAttachmentPageFactory( - attachments=[ - AppellateAttachmentFactory( - pacer_doc_id="04505578698", attachment_number=1 - ), - AppellateAttachmentFactory( - pacer_doc_id="04505578699", attachment_number=2 - ), - ], - pacer_doc_id="04505578697", - pacer_case_id="104491", - document_number="1", - ) - cls.de_data_2 = DocketEntriesDataFactory( - docket_entries=[ - DocketEntryDataFactory( - pacer_doc_id="04505578697", - document_number=1, - ) - ], - ) - def setUp(self) -> None: self.async_client = AsyncAPIClient() self.user = User.objects.get(username="recap") @@ -793,39 +772,166 @@ def test_processing_an_acms_attachment_page(self, mock_upload): main_attachment[0].document_type, RECAPDocument.ATTACHMENT ) - def test_processing_subdocket_case_attachment_page(self, mock_upload): - """Can we replicate an attachment page upload from a subdocket case - to its corresponding RD across all related dockets? + def test_match_recap_document_with_wrong_pacer_doc_id(self, mock_upload): + """Confirm that when an existing RECAPDocument has an invalid + pacer_doc_id, we can still match it after excluding the pacer_doc_id + from the lookup. """ - d_1 = DocketFactory( + de_data = DocketEntriesDataFactory( + docket_entries=[ + RECAPEmailDocketEntryDataFactory( + pacer_doc_id="04505578690", + document_number=5, + ) + ], + ) + de = DocketEntryWithParentsFactory( + docket__court=self.court, entry_number=5 + ) + rd = RECAPDocumentFactory( + docket_entry=de, + document_type=RECAPDocument.PACER_DOCUMENT, + pacer_doc_id="04505578691", + document_number="5", + description="", + ) + # Add the docket entry with the updated pacer_doc_id + async_to_sync(add_docket_entries)(de.docket, de_data["docket_entries"]) + recap_documents = RECAPDocument.objects.all() + self.assertEqual( + recap_documents.count(), 1, msg="Wrong number of RECAPDocuments" + ) + rd.refresh_from_db() + self.assertEqual( + rd.description, + de_data["docket_entries"][0]["short_description"], + msg="The short description doesn't match.", + ) + self.assertEqual( + rd.pacer_doc_id, + de_data["docket_entries"][0]["pacer_doc_id"], + msg="The pacer_doc_id doesn't match.", + ) + + def test_match_recap_document_with_wrong_pacer_doc_id_duplicated( + self, mock_upload + ): + """Confirm that when an existing RECAPDocument has an invalid + pacer_doc_id, we can still match it after excluding the pacer_doc_id + from the lookup, even if there is more than one PACER_DOCUMENT that + belongs to the docket entry. + """ + + de_data = DocketEntriesDataFactory( + docket_entries=[ + RECAPEmailDocketEntryDataFactory( + pacer_doc_id="04505578690", + document_number=5, + ) + ], + ) + de = DocketEntryWithParentsFactory( + docket__court=self.court, entry_number=5 + ) + RECAPDocumentFactory( + document_type=RECAPDocument.PACER_DOCUMENT, + docket_entry=de, + pacer_doc_id="04505578691", + document_number="5", + description="", + ) + rd_2 = RECAPDocumentFactory( + document_type=RECAPDocument.PACER_DOCUMENT, + docket_entry=de, + pacer_doc_id="04505578691", + document_number="6", + description="", + is_available=True, + ) + # Add the docket entry with the updated pacer_doc_id, remove the + # duplicated RD, and keep the one that is available. + async_to_sync(add_docket_entries)(de.docket, de_data["docket_entries"]) + recap_documents = RECAPDocument.objects.all() + self.assertEqual( + recap_documents.count(), 1, msg="Wrong number of RECAPDocuments" + ) + rd_2.refresh_from_db() + self.assertEqual( + rd_2.description, + de_data["docket_entries"][0]["short_description"], + msg="The short description doesn't match.", + ) + self.assertEqual( + rd_2.pacer_doc_id, + de_data["docket_entries"][0]["pacer_doc_id"], + msg="The pacer_doc_id doesn't match.", + ) + + +class ReplicateRecapUploadsTest(TestCase): + """Test RECAP uploads are properly replicated to subdockets.""" + + @classmethod + def setUpTestData(cls): + cls.user = User.objects.get(username="recap") + cls.f = SimpleUploadedFile("file.txt", b"file content more content") + cls.court = CourtFactory.create(jurisdiction="FD", in_use=True) + cls.att_data_2 = AppellateAttachmentPageFactory( + attachments=[ + AppellateAttachmentFactory( + pacer_doc_id="04505578698", attachment_number=1 + ), + AppellateAttachmentFactory( + pacer_doc_id="04505578699", attachment_number=2 + ), + ], + pacer_doc_id="04505578697", + pacer_case_id="104491", + document_number="1", + ) + cls.de_data_2 = DocketEntriesDataFactory( + docket_entries=[ + DocketEntryDataFactory( + pacer_doc_id="04505578697", + document_number=1, + ) + ], + ) + + cls.d_1 = DocketFactory( source=Docket.RECAP, docket_number="23-4567", - court=self.court, + court=cls.court, pacer_case_id="104490", ) - d_2 = DocketFactory( + cls.d_2 = DocketFactory( source=Docket.RECAP, docket_number="23-4567", - court=self.court, + court=cls.court, pacer_case_id="104491", ) - d_3 = DocketFactory( + cls.d_3 = DocketFactory( source=Docket.RECAP, docket_number="23-4567", - court=self.court, + court=cls.court, pacer_case_id="104492", ) + def test_processing_subdocket_case_attachment_page(self): + """Can we replicate an attachment page upload from a subdocket case + to its corresponding RD across all related dockets? + """ + # Add the docket entry to every case. async_to_sync(add_docket_entries)( - d_1, self.de_data_2["docket_entries"] + self.d_1, self.de_data_2["docket_entries"] ) async_to_sync(add_docket_entries)( - d_2, self.de_data_2["docket_entries"] + self.d_2, self.de_data_2["docket_entries"] ) async_to_sync(add_docket_entries)( - d_3, self.de_data_2["docket_entries"] + self.d_3, self.de_data_2["docket_entries"] ) # Create an initial PQ. @@ -837,18 +943,18 @@ def test_processing_subdocket_case_attachment_page(self, mock_upload): filepath_local=self.f, ) d_1_recap_document = RECAPDocument.objects.filter( - docket_entry__docket=d_1 + docket_entry__docket=self.d_1 ) d_2_recap_document = RECAPDocument.objects.filter( - docket_entry__docket=d_2 + docket_entry__docket=self.d_2 ) d_3_recap_document = RECAPDocument.objects.filter( - docket_entry__docket=d_3 + docket_entry__docket=self.d_3 ) main_d_1_rd = d_1_recap_document[0] main_d_2_rd = d_2_recap_document[0] - main_d_3_rd = d_2_recap_document[0] + main_d_3_rd = d_3_recap_document[0] # After adding 1 docket entry, it should only exist its main RD on # every docket @@ -877,22 +983,22 @@ def test_processing_subdocket_case_attachment_page(self, mock_upload): self.assertEqual( d_1_recap_document.count(), 3, - msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {d_2.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {self.d_2.pacer_case_id}.", ) self.assertEqual( d_2_recap_document.count(), 3, - msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {d_1.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {self.d_1.pacer_case_id}.", ) self.assertEqual( d_3_recap_document.count(), 3, - msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {d_3.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {self.d_3.pacer_case_id}.", ) main_d_1_rd.refresh_from_db() main_d_2_rd.refresh_from_db() - main_d_2_rd.refresh_from_db() + main_d_3_rd.refresh_from_db() self.assertEqual( main_d_1_rd.pacer_doc_id, self.de_data_2["docket_entries"][0]["pacer_doc_id"], @@ -908,29 +1014,32 @@ def test_processing_subdocket_case_attachment_page(self, mock_upload): # Two of them should be attachments. d_1_attachments = RECAPDocument.objects.filter( - docket_entry__docket=d_1, document_type=RECAPDocument.ATTACHMENT + docket_entry__docket=self.d_1, + document_type=RECAPDocument.ATTACHMENT, ) d_2_attachments = RECAPDocument.objects.filter( - docket_entry__docket=d_2, document_type=RECAPDocument.ATTACHMENT + docket_entry__docket=self.d_2, + document_type=RECAPDocument.ATTACHMENT, ) d_3_attachments = RECAPDocument.objects.filter( - docket_entry__docket=d_3, document_type=RECAPDocument.ATTACHMENT + docket_entry__docket=self.d_3, + document_type=RECAPDocument.ATTACHMENT, ) self.assertEqual( d_1_attachments.count(), 2, - msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {d_1.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {self.d_1.pacer_case_id}.", ) self.assertEqual( d_2_attachments.count(), 2, - msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {d_2.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {self.d_2.pacer_case_id}.", ) self.assertEqual( d_3_attachments.count(), 2, - msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {d_3.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs Attachments for the docket with PACER case ID {self.d_3.pacer_case_id}.", ) att_1_data = self.att_data_2["attachments"][0] @@ -969,7 +1078,9 @@ def test_processing_subdocket_case_attachment_page(self, mock_upload): self.assertEqual(pqs_status, {PROCESSING_STATUS.SUCCESSFUL}) pqs_related_dockets = {pq.docket_id for pq in pqs_created} - self.assertEqual(pqs_related_dockets, {d_1.pk, d_2.pk, d_3.pk}) + self.assertEqual( + pqs_related_dockets, {self.d_1.pk, self.d_2.pk, self.d_3.pk} + ) # 3 PacerHtmlFiles should have been created, one for each case. att_html_created = PacerHtmlFiles.objects.all() @@ -981,29 +1092,15 @@ def test_processing_subdocket_case_attachment_page(self, mock_upload): {de.pk for de in DocketEntry.objects.all()}, related_htmls_de ) - def test_process_attachments_for_subdocket_pq_with_missing_main_rd( - self, mock_upload - ): + def test_process_attachments_for_subdocket_pq_with_missing_main_rd(self): """Confirm that if the RD related to the initial PQ is missing, we can still process attachments for subdocket cases where the main RD matches. """ - d_1 = DocketFactory( - source=Docket.RECAP, - docket_number="23-4567", - court=self.court, - pacer_case_id="104490", - ) - d_2 = DocketFactory( - source=Docket.RECAP, - docket_number="23-4567", - court=self.court, - pacer_case_id="104491", - ) # Add the docket entry only to d_1. async_to_sync(add_docket_entries)( - d_1, self.de_data_2["docket_entries"] + self.d_1, self.de_data_2["docket_entries"] ) # Create an initial PQ related to d_1 @@ -1015,22 +1112,22 @@ def test_process_attachments_for_subdocket_pq_with_missing_main_rd( filepath_local=self.f, ) d_1_recap_document = RECAPDocument.objects.filter( - docket_entry__docket=d_1 + docket_entry__docket=self.d_1 ) d_2_recap_document = RECAPDocument.objects.filter( - docket_entry__docket=d_2 + docket_entry__docket=self.d_2 ) # After adding 1 docket entry d_1 self.assertEqual( d_1_recap_document.count(), 1, - msg=f"Didn't get the initial number of RDs for the docket with PACER case ID {d_1.pacer_case_id}", + msg=f"Didn't get the initial number of RDs for the docket with PACER case ID {self.d_1.pacer_case_id}", ) self.assertEqual( d_2_recap_document.count(), 0, - msg=f"Didn't get the initial number of RDs for the docket with PACER case ID {d_2.pacer_case_id}", + msg=f"Didn't get the initial number of RDs for the docket with PACER case ID {self.d_2.pacer_case_id}", ) with mock.patch( @@ -1044,12 +1141,12 @@ def test_process_attachments_for_subdocket_pq_with_missing_main_rd( self.assertEqual( d_1_recap_document.count(), 3, - msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {d_2.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {self.d_2.pacer_case_id}.", ) self.assertEqual( d_2_recap_document.count(), 0, - msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {d_1.pacer_case_id}.", + msg=f"Didn't get the expected number of RDs for the docket with PACER case ID {self.d_1.pacer_case_id}.", ) pq.refresh_from_db() @@ -1068,10 +1165,241 @@ def test_process_attachments_for_subdocket_pq_with_missing_main_rd( self.assertEqual(successful_pq[0].status, PROCESSING_STATUS.SUCCESSFUL) self.assertEqual( successful_pq[0].docket_id, - d_1.pk, + self.d_1.pk, msg="Didn't get the expected docket ID.", ) + @mock.patch("cl.recap.tasks.extract_recap_pdf_base") + def test_processing_subdocket_case_pdf_upload(self, mock_extract): + """Can we duplicate a PDF document upload from a subdocket case to the + corresponding RD across all related dockets? + """ + + # Add the docket entry to every case. + async_to_sync(add_docket_entries)( + self.d_1, self.de_data_2["docket_entries"] + ) + async_to_sync(add_docket_entries)( + self.d_2, self.de_data_2["docket_entries"] + ) + async_to_sync(add_docket_entries)( + self.d_3, self.de_data_2["docket_entries"] + ) + + d_1_recap_document = RECAPDocument.objects.filter( + docket_entry__docket=self.d_1 + ) + d_2_recap_document = RECAPDocument.objects.filter( + docket_entry__docket=self.d_2 + ) + d_3_recap_document = RECAPDocument.objects.filter( + docket_entry__docket=self.d_3 + ) + + main_d_1_rd = d_1_recap_document[0] + main_d_2_rd = d_2_recap_document[0] + main_d_3_rd = d_3_recap_document[0] + + self.assertFalse(main_d_1_rd.is_available) + self.assertFalse(main_d_2_rd.is_available) + self.assertFalse(main_d_3_rd.is_available) + + # Two test cases: pacer_case_id and blank pacer_case_id + pacer_case_ids = ["104491", ""] + for pacer_case_id in pacer_case_ids: + with ( + self.subTest(pacer_case_id=pacer_case_id), + transaction.atomic(), + ): + # Create an initial PQ. + pq = ProcessingQueue.objects.create( + court=self.court, + uploader=self.user, + pacer_case_id=pacer_case_id, + pacer_doc_id="04505578697", + document_number=1, + upload_type=UPLOAD_TYPE.PDF, + filepath_local=self.f, + ) + + # Process the PDF upload. + async_to_sync(process_recap_upload)(pq) + + main_d_1_rd.refresh_from_db() + main_d_2_rd.refresh_from_db() + main_d_3_rd.refresh_from_db() + + self.assertTrue( + main_d_1_rd.is_available, + msg="is_available value doesn't match", + ) + self.assertTrue( + main_d_2_rd.is_available, + msg="is_available value doesn't match", + ) + self.assertTrue( + main_d_3_rd.is_available, + msg="is_available value doesn't match", + ) + + self.assertTrue(main_d_1_rd.filepath_local) + self.assertTrue(main_d_2_rd.filepath_local) + self.assertTrue(main_d_3_rd.filepath_local) + + # Assert the number of PQs created to process the additional subdocket RDs. + pqs_created = ProcessingQueue.objects.all() + self.assertEqual( + pqs_created.count(), + 3, + msg="The number of PQs doesn't match.", + ) + + pqs_status = {pq.status for pq in pqs_created} + self.assertEqual(pqs_status, {PROCESSING_STATUS.SUCCESSFUL}) + + pqs_related_dockets = {pq.docket_id for pq in pqs_created} + self.assertEqual( + pqs_related_dockets, + {self.d_1.pk, self.d_2.pk, self.d_3.pk}, + ) + pqs_related_docket_entries = { + pq.docket_entry_id for pq in pqs_created + } + self.assertEqual( + pqs_related_docket_entries, + { + main_d_1_rd.docket_entry.pk, + main_d_2_rd.docket_entry.pk, + main_d_3_rd.docket_entry.pk, + }, + ) + pqs_related_rds = {pq.recap_document_id for pq in pqs_created} + self.assertEqual( + pqs_related_rds, + {main_d_1_rd.pk, main_d_2_rd.pk, main_d_3_rd.pk}, + ) + + transaction.set_rollback(True) + + @mock.patch("cl.recap.tasks.extract_recap_pdf_base") + def test_processing_subdocket_case_pdf_attachment_upload( + self, mock_extract + ): + """Can we duplicate a PDF attachment document upload from a subdocket + case to the corresponding RD across all related dockets? + """ + + # Add the docket entry to every case. + async_to_sync(add_docket_entries)( + self.d_1, self.de_data_2["docket_entries"] + ) + async_to_sync(add_docket_entries)( + self.d_2, self.de_data_2["docket_entries"] + ) + + pq_att = ProcessingQueue.objects.create( + court=self.court, + uploader=self.user, + pacer_case_id="104491", + upload_type=UPLOAD_TYPE.ATTACHMENT_PAGE, + filepath_local=self.f, + ) + + with mock.patch( + "cl.recap.tasks.get_data_from_att_report", + side_effect=lambda x, y: self.att_data_2, + ): + # Process the attachment page containing 2 attachments. + async_to_sync(process_recap_upload)(pq_att) + + d_1_recap_document = RECAPDocument.objects.filter( + docket_entry__docket=self.d_1 + ) + d_2_recap_document = RECAPDocument.objects.filter( + docket_entry__docket=self.d_2 + ) + self.assertEqual(d_1_recap_document.count(), 3) + self.assertEqual(d_2_recap_document.count(), 3) + + att_d_1_rd = d_1_recap_document.filter(attachment_number=2).first() + att_d_2_rd = d_2_recap_document.filter(attachment_number=2).first() + + self.assertFalse(att_d_1_rd.is_available) + self.assertFalse(att_d_2_rd.is_available) + + # Two test cases: pacer_case_id and blank pacer_case_id + pacer_case_ids = ["104491", ""] + for pacer_case_id in pacer_case_ids: + with ( + self.subTest(pacer_case_id=pacer_case_id), + transaction.atomic(), + ): + # Create an initial PQ. + pq = ProcessingQueue.objects.create( + court=self.court, + uploader=self.user, + pacer_case_id=pacer_case_id, + pacer_doc_id="04505578699", + document_number=1, + attachment_number=2, + upload_type=UPLOAD_TYPE.PDF, + filepath_local=self.f, + ) + + # Process the PDF upload. + async_to_sync(process_recap_upload)(pq) + + att_d_1_rd.refresh_from_db() + att_d_2_rd.refresh_from_db() + + self.assertTrue( + att_d_1_rd.is_available, + msg="is_available value doesn't match", + ) + self.assertTrue( + att_d_2_rd.is_available, + msg="is_available value doesn't match", + ) + + self.assertTrue(att_d_1_rd.filepath_local) + self.assertTrue(att_d_2_rd.filepath_local) + + # Assert the number of PQs created to process the additional subdocket RDs. + pqs_created = ProcessingQueue.objects.filter( + upload_type=UPLOAD_TYPE.PDF + ) + self.assertEqual( + pqs_created.count(), + 2, + msg="The number of PQs doesn't match.", + ) + + pqs_status = {pq.status for pq in pqs_created} + self.assertEqual(pqs_status, {PROCESSING_STATUS.SUCCESSFUL}) + + pqs_related_dockets = {pq.docket_id for pq in pqs_created} + self.assertEqual( + pqs_related_dockets, + {self.d_1.pk, self.d_2.pk}, + ) + pqs_related_docket_entries = { + pq.docket_entry_id for pq in pqs_created + } + self.assertEqual( + pqs_related_docket_entries, + { + att_d_1_rd.docket_entry.pk, + att_d_2_rd.docket_entry.pk, + }, + ) + pqs_related_rds = {pq.recap_document_id for pq in pqs_created} + self.assertEqual( + pqs_related_rds, + {att_d_1_rd.pk, att_d_2_rd.pk}, + ) + + transaction.set_rollback(True) + @mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport) @mock.patch( diff --git a/poetry.lock b/poetry.lock index 3703770058..be4b8ae1a8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -945,13 +945,13 @@ files = [ [[package]] name = "django" -version = "5.1.2" +version = "5.1.4" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." optional = false python-versions = ">=3.10" files = [ - {file = "Django-5.1.2-py3-none-any.whl", hash = "sha256:f11aa87ad8d5617171e3f77e1d5d16f004b79a2cf5d2e1d2b97a6a1f8e9ba5ed"}, - {file = "Django-5.1.2.tar.gz", hash = "sha256:bd7376f90c99f96b643722eee676498706c9fd7dc759f55ebfaf2c08ebcdf4f0"}, + {file = "Django-5.1.4-py3-none-any.whl", hash = "sha256:236e023f021f5ce7dee5779de7b286565fdea5f4ab86bae5338e3f7b69896cf0"}, + {file = "Django-5.1.4.tar.gz", hash = "sha256:de450c09e91879fa5a307f696e57c851955c910a438a35e6b4c895e86bedc82a"}, ] [package.dependencies] @@ -2319,13 +2319,13 @@ setuptools = "*" [[package]] name = "juriscraper" -version = "2.6.48" +version = "2.6.49" description = "An API to scrape American court websites for metadata." optional = false python-versions = "*" files = [ - {file = "juriscraper-2.6.48-py27-none-any.whl", hash = "sha256:f2e198cb66a5d3f1423ec4928fc76e1f25c13d0caafc2a6262a7d158c39eab8e"}, - {file = "juriscraper-2.6.48.tar.gz", hash = "sha256:bc138e2c5776f55ef96c10f4a4185d0fec80d83e555e25d1f3fb4b384d399c53"}, + {file = "juriscraper-2.6.49-py27-none-any.whl", hash = "sha256:5954c15747ee5a922d8388db9bb1649100bf8376c39122dc2f4ede2b437b8d0b"}, + {file = "juriscraper-2.6.49.tar.gz", hash = "sha256:28254a22584cfd92a47bb91f9f3bc9139514ffdddfdad1085eacdc70f79fa264"}, ] [package.dependencies] @@ -5692,4 +5692,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.13, <3.14" -content-hash = "8703160c5832be62299f5a926fa8670aed8715cb7c03dc7dd7be2d1a5c84fb2a" +content-hash = "49aab347be47355db92d0faabdfdb28120a588fb5694887e3e730cca312a0945" diff --git a/pyproject.toml b/pyproject.toml index ecff70afd5..c24ceec367 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ celery = "^5.4.0" certifi = "^2024.12.14" courts-db = "*" disposable-email-domains = "*" -Django = "^5.1.2" +Django = "^5.1.4" django-cache-memoize = "==0.*" django-cors-headers = "^4.6.0" django-csp = "^3.8"