In the “As cURL” tab, you can copy/paste a curl command that can be used to send a test event to your local dev environment.
- Dict[str, Dict[str, int]]:
- """Invert the user logs for a period of time
+ """Aggregate API usage statistics per user over a date range.
- The user logs have the date in the key and the user as part of the set:
+ - Anonymous users are aggregated under the key 'AnonymousUser'.
+ - Both v3 and v4 API counts are combined in the results.
- 'api:v3.user.d:2016-10-01.counts': {
- mlissner: 22,
- joe_hazard: 33,
- }
-
- This inverts these entries to:
+ :param start: Beginning date (inclusive) for the query range
+ :param end: End date (inclusive) for the query range
+ :param add_usernames: If True, replaces user IDs with usernames as keys.
+ When False, uses only user IDs as keys.
- users: {
- mlissner: {
- 2016-10-01: 22,
- total: 22,
- },
- joe_hazard: {
- 2016-10-01: 33,
- total: 33,
- }
- }
- :param start: The beginning date (inclusive) you want the results for. A
- :param end: The end date (inclusive) you want the results for.
- :param add_usernames: Stats are stored with the user ID. If this is True,
- add an alias in the returned dictionary that contains the username as well.
- :return The inverted dictionary
+ :return: Dictionary mapping user identifiers (usernames if `add_usernames=True`,
+ otherwise user IDs) to their daily API usage counts and totals.
+ Inner dictionaries are ordered by date. Only dates with usage are included.
"""
r = get_redis_interface("STATS")
pipe = r.pipeline()
dates = make_date_str_list(start, end)
+ versions = ["v3", "v4"]
for d in dates:
- pipe.zrange(f"api:v3.user.d:{d}.counts", 0, -1, withscores=True)
+ for version in versions:
+ pipe.zrange(
+ f"api:{version}.user.d:{d}.counts",
+ 0,
+ -1,
+ withscores=True,
+ )
+
+ # results contains alternating v3/v4 API usage data for each date queried.
+ # For example, if querying 2023-01-01 to 2023-01-02, results might look like:
+ # [
+ # # 2023-01-01 v3 data: [(user_id, count), ...]
+ # [("1", 100.0), ("2", 50.0)],
+ # # 2023-01-01 v4 data
+ # [("1", 50.0), ("2", 25.0)],
+ # # 2023-01-02 v3 data
+ # [("1", 200.0), ("2", 100.0)],
+ # # 2023-01-02 v4 data
+ # [("1", 100.0), ("2", 50.0)]
+ # ]
+ # We zip this with dates to combine v3/v4 counts per user per day
results = pipe.execute()
- # results is a list of results for each of the zrange queries above. Zip
- # those results with the date that created it, and invert the whole thing.
out: defaultdict = defaultdict(dict)
- for d, result in zip(dates, results):
- for user_id, count in result:
- if user_id == "None" or user_id == "AnonymousUser":
- user_id = "AnonymousUser"
- else:
- user_id = int(user_id)
- count = int(count)
- if out.get(user_id):
- out[user_id][d] = count
- out[user_id]["total"] += count
- else:
- out[user_id] = {d: count, "total": count}
+
+ def update_user_counts(_user_id, _count, _date):
+ user_is_anonymous = _user_id == "None" or _user_id == "AnonymousUser"
+ _user_id = "AnonymousUser" if user_is_anonymous else int(_user_id)
+ _count = int(_count)
+ out.setdefault(_user_id, OrderedDict())
+ out[_user_id].setdefault(_date, 0)
+ out[_user_id][_date] += _count
+ out[_user_id].setdefault("total", 0)
+ out[_user_id]["total"] += _count
+
+ for d, api_usage in zip(dates, batched(results, len(versions))):
+ for user_id, count in chain(*api_usage):
+ update_user_counts(user_id, count, d)
# Sort the values
for k, v in out.items():
diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 1cdfdbb510..f7862d3be7 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -1829,3 +1829,11 @@ rect.series-segment {
.prayer-button[data-gap-size="large"]{
margin-left: 44px;
}
+
+.gap-1 {
+ gap: 0.25rem
+}
+
+.gap-2 {
+ gap: 0.5rem
+}
diff --git a/cl/assets/templates/admin/docket_change_form.html b/cl/assets/templates/admin/docket_change_form.html
new file mode 100644
index 0000000000..c3a4b000fc
--- /dev/null
+++ b/cl/assets/templates/admin/docket_change_form.html
@@ -0,0 +1,19 @@
+{% extends "admin/change_form.html" %}
+
+{% block object-tools-items %}
+ {% if docket_entries_url %}
+
diff --git a/cl/corpus_importer/factories.py b/cl/corpus_importer/factories.py
index c9b7bdc86d..3cbe3fbcc3 100644
--- a/cl/corpus_importer/factories.py
+++ b/cl/corpus_importer/factories.py
@@ -36,29 +36,6 @@ class CaseLawFactory(factory.DictFactory):
docket_number = Faker("federal_district_docket_number")
-class RssDocketEntryDataFactory(factory.DictFactory):
- date_filed = Faker("date_object")
- description = ""
- document_number = Faker("pyint", min_value=1, max_value=100)
- pacer_doc_id = Faker("random_id_string")
- pacer_seq_no = Faker("random_id_string")
- short_description = Faker("text", max_nb_chars=40)
-
-
-class RssDocketDataFactory(factory.DictFactory):
- court_id = FuzzyText(length=4, chars=string.ascii_lowercase, suffix="d")
- case_name = Faker("case_name")
- docket_entries = factory.List(
- [factory.SubFactory(RssDocketEntryDataFactory)]
- )
- docket_number = Faker("federal_district_docket_number")
- office = Faker("pyint", min_value=1, max_value=100)
- chapter = Faker("pyint", min_value=1, max_value=100)
- trustee_str = Faker("text", max_nb_chars=15)
- type = Faker("text", max_nb_chars=8)
- pacer_case_id = Faker("random_id_string")
-
-
class FreeOpinionRowDataFactory(factory.DictFactory):
case_name = Faker("case_name")
cause = Faker("text", max_nb_chars=8)
diff --git a/cl/corpus_importer/management/commands/troller_bk.py b/cl/corpus_importer/management/commands/troller_bk.py
deleted file mode 100644
index 054d9e4682..0000000000
--- a/cl/corpus_importer/management/commands/troller_bk.py
+++ /dev/null
@@ -1,864 +0,0 @@
-# Import the troller BK RSS feeds
-import argparse
-import concurrent.futures
-import gc
-import linecache
-import re
-import sys
-import threading
-from collections import defaultdict
-from datetime import datetime, timezone
-from queue import Queue
-from typing import Any, DefaultDict, Mapping, TypedDict
-from urllib.parse import unquote
-
-from asgiref.sync import async_to_sync, sync_to_async
-from dateutil.parser import ParserError
-from django.db import DataError, IntegrityError, transaction
-from django.db.models import Q
-from django.utils.text import slugify
-from django.utils.timezone import make_aware
-from juriscraper.pacer import PacerRssFeed
-
-from cl.custom_filters.templatetags.text_filters import best_case_name
-from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.model_helpers import make_docket_number_core
-from cl.lib.pacer import map_pacer_to_cl_id
-from cl.lib.redis_utils import get_redis_interface
-from cl.lib.storage import S3PrivateUUIDStorage
-from cl.lib.string_utils import trunc
-from cl.lib.timezone_helpers import localize_date_and_time
-from cl.recap.mergers import (
- add_bankruptcy_data_to_docket,
- calculate_recap_sequence_numbers,
- find_docket_object,
- update_docket_metadata,
-)
-from cl.recap_rss.tasks import (
- cache_hash,
- get_last_build_date,
- hash_item,
- is_cached,
-)
-from cl.search.models import Court, Docket, DocketEntry, RECAPDocument
-
-FILES_BUFFER_THRESHOLD = 3
-
-
-async def check_for_early_termination(
- court_id: str, docket: dict[str, Any]
-) -> str | None:
- """Check for early termination, skip the rest of the file in case a cached
- item is reached or skip a single item if it doesn't contain required data.
- Cache the current item.
-
- :param court_id: The court the docket entries belong to.
- :param docket: A dict containing the item data.
- :return: A "break" string indicating if the rest of the file should be
- omitted, "continue" if only the current item should be omitted or None.
- """
- item_hash = hash_item(docket)
- if await is_cached(item_hash):
- logger.info(
- f"Hit a cached item, finishing adding bulk entries for {court_id} feed. "
- )
- return "break"
-
- await cache_hash(item_hash)
- if (
- not docket["pacer_case_id"]
- and not docket["docket_number"]
- or not len(docket["docket_entries"])
- ):
- return "continue"
- return None
-
-
-def add_new_docket_from_rss(
- court_id: str,
- d: Docket,
- docket: dict[str, Any],
- unique_dockets: dict[str, Any],
- dockets_to_create: list[Docket],
-) -> None:
- """Set metadata and extra values to the Docket object and append it to
- the list of dockets to be added in bulk.
-
- :param court_id: The court the docket entries belong to.
- :param d: The Docket object to modify and add.
- :param docket: The dict containing the item data.
- :param unique_dockets: The dict to keep track of unique dockets to add.
- :param dockets_to_create: The list of dockets to add in bulk.
- :return: None
- """
-
- date_filed, time_filed = localize_date_and_time(
- court_id, docket["docket_entries"][0]["date_filed"]
- )
- async_to_sync(update_docket_metadata)(d, docket)
- d.pacer_case_id = docket["pacer_case_id"]
- d.slug = slugify(trunc(best_case_name(d), 75))
- d.date_last_filing = date_filed
- if d.docket_number:
- d.docket_number_core = make_docket_number_core(d.docket_number)
-
- docket_in_list = unique_dockets.get(docket["docket_number"], None)
- if not docket_in_list:
- unique_dockets[docket["docket_number"]] = docket
- dockets_to_create.append(d)
-
-
-def do_bulk_additions(
- court_id: str,
- unique_dockets: dict[str, Any],
- dockets_to_create: list[Docket],
- des_to_add_no_existing_docket: DefaultDict[str, list[dict[str, Any]]],
- des_to_add_existing_docket: list[tuple[int, dict[str, Any]]],
-) -> tuple[list[int], int]:
- """Create dockets, docket entries and recap documents in bulk.
-
- :param court_id: The court the docket entries belong to.
- :param unique_dockets: The dict to keep track of unique dockets to add.
- :param dockets_to_create: The list of dockets to add in bulk.
- :param des_to_add_no_existing_docket: A DefaultDict containing entries to
- add which its parent docket didn't exist, docket_number: [entries]
- :param des_to_add_existing_docket: A list of tuples containing entries to
- add which its parent docket exists, (docket.pk, docket_entry)
- :return: A tuple containing a list of created recap documents pks, the
- number of dockets created.
- """
-
- with transaction.atomic():
- # Create dockets in bulk.
- d_bulk_created = Docket.objects.bulk_create(dockets_to_create)
-
- # Add bankruptcy data to dockets.
- for d in d_bulk_created:
- docket_data = unique_dockets.get(d.docket_number)
- if docket_data:
- add_bankruptcy_data_to_docket(d, docket_data)
-
- # Find and assign the created docket pk to the list of docket entries
- # to add.
- for d_created in d_bulk_created:
- docket_number = d_created.docket_number
- des_to_create = des_to_add_no_existing_docket[docket_number]
- for de_entry in des_to_create:
- des_to_add_existing_docket.append((d_created.pk, de_entry))
-
- # Create docket entries in bulk.
- docket_entries_to_add_bulk = get_docket_entries_to_add(
- court_id, des_to_add_existing_docket
- )
- des_bulk_created = DocketEntry.objects.bulk_create(
- docket_entries_to_add_bulk
- )
-
- # Create RECAP documents in bulk.
- rds_to_create_bulk = get_rds_to_add(
- des_bulk_created, des_to_add_existing_docket
- )
- rd_bulk_created = RECAPDocument.objects.bulk_create(rds_to_create_bulk)
-
- return [rd.pk for rd in rd_bulk_created], len(d_bulk_created)
-
-
-def get_docket_entries_to_add(
- court_id: str, des_to_add_existing_docket: list[tuple[int, dict[str, Any]]]
-) -> list[DocketEntry]:
- """Make and return a list of the DocketEntry objects to save in bulk.
-
- :param court_id: The court the docket entries belong to.
- :param des_to_add_existing_docket: A list of tuples containing the docket
- pk the entry belongs to, the docket entry dict.
- :return: A list of DocketEntry objects.
- """
-
- docket_entries_to_add_bulk = []
- for de_add in des_to_add_existing_docket:
- d_pk = de_add[0]
- docket_entry = de_add[1]
- calculate_recap_sequence_numbers([docket_entry], court_id)
- date_filed, time_filed = localize_date_and_time(
- court_id, docket_entry["date_filed"]
- )
- de_to_add = DocketEntry(
- docket_id=d_pk,
- entry_number=docket_entry["document_number"],
- description=docket_entry["description"],
- pacer_sequence_number=docket_entry["pacer_seq_no"],
- recap_sequence_number=docket_entry["recap_sequence_number"],
- time_filed=time_filed,
- date_filed=date_filed,
- )
- docket_entries_to_add_bulk.append(de_to_add)
- return docket_entries_to_add_bulk
-
-
-def get_rds_to_add(
- des_bulk_created: list[DocketEntry],
- des_to_add_existing_docket: list[tuple[int, dict[str, Any]]],
-) -> list[RECAPDocument]:
- """Make and return a list of the RECAPDocument objects to save in bulk.
-
- :param des_bulk_created: The list of DocketEntry objects saved in a
- previous step.
- :param des_to_add_existing_docket: A list of tuples containing the docket
- pk the entry belongs to, the docket entry dict.
- :return: A list of RECAPDocument objects.
- """
-
- rds_to_create_bulk = []
- for d_entry, bulk_created in zip(
- des_to_add_existing_docket, des_bulk_created
- ):
- de_pk = bulk_created.pk
- docket_entry = d_entry[1]
- document_number = docket_entry["document_number"] or ""
- rd = RECAPDocument(
- docket_entry_id=de_pk,
- document_number=document_number,
- description=docket_entry["short_description"],
- document_type=RECAPDocument.PACER_DOCUMENT,
- pacer_doc_id=docket_entry["pacer_doc_id"],
- is_available=False,
- )
- rds_to_create_bulk.append(rd)
-
- return rds_to_create_bulk
-
-
-async def merge_rss_data(
- feed_data: list[dict[str, Any]],
- court_id: str,
- build_date: datetime | None,
-) -> tuple[list[int], int]:
- """Merge the RSS data into the database
-
- :param feed_data: Data from an RSS feed file
- :param court_id: The PACER court ID for the item
- :param build_date: The RSS date build.
- :return: A list of RECAPDocument PKs that can be passed to Solr
- """
-
- court_id = map_pacer_to_cl_id(court_id)
- court = await Court.objects.aget(pk=court_id)
- dockets_created = 0
- all_rds_created: list[int] = []
- court_ids = (
- Court.federal_courts.district_or_bankruptcy_pacer_courts().values_list(
- "pk", flat=True
- )
- )
- courts_exceptions_no_rss = ["miwb", "nceb", "pamd", "cit"]
- if (
- build_date
- and build_date
- > make_aware(datetime(year=2018, month=4, day=20), timezone.utc)
- and await court_ids.filter(id=court_id).aexists()
- and court_id not in courts_exceptions_no_rss
- ):
- # Avoid parsing/adding feeds after we start scraping RSS Feeds for
- # district and bankruptcy courts.
- return all_rds_created, dockets_created
-
- dockets_to_create: list[Docket] = []
- unique_dockets: dict[str, Any] = {}
- des_to_add_existing_docket: list[tuple[int, dict[str, Any]]] = []
- des_to_add_no_existing_docket: DefaultDict[str, list[dict[str, Any]]] = (
- defaultdict(list)
- )
- for docket in feed_data:
- skip_or_break = await check_for_early_termination(court_id, docket)
- if skip_or_break == "continue":
- continue
- elif skip_or_break == "break":
- break
-
- d = await find_docket_object(
- court_id,
- docket["pacer_case_id"],
- docket["docket_number"],
- docket.get("federal_defendant_number"),
- docket.get("federal_dn_judge_initials_assigned"),
- docket.get("federal_dn_judge_initials_referred"),
- )
- docket_entry = docket["docket_entries"][0]
- document_number = docket["docket_entries"][0]["document_number"]
- if (
- document_number
- and d.pk
- and await d.docket_entries.filter(
- entry_number=document_number
- ).aexists()
- ):
- # It's an existing docket entry; let's not add it.
- continue
- else:
- # Try finding the docket entry by short_description.
- short_description = docket_entry["short_description"]
- query = Q()
- if short_description:
- query |= Q(
- recap_documents__description=docket_entry[
- "short_description"
- ]
- )
- if (
- d.pk
- and await d.docket_entries.filter(
- query,
- date_filed=docket_entry["date_filed"],
- entry_number=docket_entry["document_number"],
- ).aexists()
- ):
- # It's an existing docket entry; let's not add it.
- continue
-
- d.add_recap_source()
- if not d.pk:
- # Set metadata for the new docket and append the docket and entry
- # to the list to add in bulk.
- if (
- not docket["pacer_case_id"]
- and court.jurisdiction != Court.FEDERAL_APPELLATE
- ):
- # Avoid adding the docket if it belongs to a district/bankr
- # court and doesn't have a pacer_case_id
- continue
-
- await sync_to_async(add_new_docket_from_rss)(
- court_id,
- d,
- docket,
- unique_dockets,
- dockets_to_create,
- )
- # Append docket entries to add in bulk.
- des_to_add_no_existing_docket[docket["docket_number"]].append(
- docket_entry
- )
- else:
- # Existing docket, update source, add bankr data and append the
- # docket entry to add in bulk.
- des_to_add_existing_docket.append((d.pk, docket_entry))
- try:
- await d.asave(update_fields=["source"])
- await sync_to_async(add_bankruptcy_data_to_docket)(d, docket)
- except (DataError, IntegrityError):
- # Trouble. Log and move on
- logger.warn(
- "Got DataError or IntegrityError while saving docket."
- )
-
- rds_created_pks, dockets_created = await sync_to_async(do_bulk_additions)(
- court_id,
- unique_dockets,
- dockets_to_create,
- des_to_add_no_existing_docket,
- des_to_add_existing_docket,
- )
- all_rds_created.extend(rds_created_pks)
- logger.info(
- f"Finished adding {court_id} feed. Added {len(all_rds_created)} RDs."
- )
- return all_rds_created, dockets_created
-
-
-def parse_file(
- binary_content: bytes,
- court_id: str,
-) -> tuple[Any, datetime | None]:
- """Parse a RSS file and return the data.
-
- :param binary_content: The binary content of the file to parse.
- :param court_id: The PACER court ID for the item
- :return The parsed data from the retrieved XML feed.
- """
-
- feed = PacerRssFeed(court_id)
- content = binary_content.decode("utf-8")
- feed._parse_text(content)
- build_date = get_last_build_date(binary_content)
- return feed.data, build_date
-
-
-def get_court_from_line(line: str):
- """Get the court_id from the line.
-
- This is a bit annoying. Each file name looks something like:
-
- sources/troller-files/o-894|1599853056
- sources/troller-files/w-w-894|1599853056
- sources/troller-files/o-DCCF0395-BDBA-C444-149D8D8EFA2EC03D|1576082101
- sources/troller-files/w-88AC552F-BDBA-C444-1BD52598BA252265|1435103773
- sources/troller-files/w-w-DCCF049E-BDBA-C444-107C577164350B1E|1638858935
- sources/troller-files/w-88AC552F-BDBA-C444-1BD52598BA252265-1399913581
- sources/troller-files/w-w-Mariana|1638779760
-
- The court_id is based on the part between the "/o-" and the "|" or "-".
- Match it, look it up in our table of court IDs, and return the correct PACER ID.
-
- :param line: A line to a file in S3
- :return: The PACER court ID for the feed
- """
-
- court = None
- regex = re.compile(
- r"([A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{16})|-([0-9]{3})\||-([0-9]{3})-|(Mariana)"
- )
- match = re.search(regex, line)
- if match is None:
- return None
- if match.group(1):
- court = match.group(1)
- if match.group(2):
- court = match.group(2)
- if match.group(3):
- court = match.group(3)
- if match.group(4):
- court = match.group(4)
-
- if not court:
- return None
- return troller_ids.get(court, None)
-
-
-class OptionsType(TypedDict):
- offset: int
- limit: int
- file: str
-
-
-def log_added_items_to_redis(
- dockets_created: int, rds_created: int, line: int
-) -> Mapping[str | bytes, int | str]:
- """Log the number of dockets and recap documents created to redis.
- Get the previous stored values and add the new ones.
-
- :param dockets_created: The dockets created.
- :param rds_created: The recap documents created.
- :param line: The last line imported.
- :return: The data logged to redis.
- """
-
- r = get_redis_interface("STATS")
- pipe = r.pipeline()
- log_key = "troller_bk:log"
- pipe.hgetall(log_key)
- stored_values = pipe.execute()
- current_total_dockets = int(stored_values[0].get("total_dockets", 0))
- current_total_rds = int(stored_values[0].get("total_rds", 0))
-
- total_dockets_created = dockets_created + current_total_dockets
- total_rds_created = rds_created + current_total_rds
- log_info: Mapping[str | bytes, int | str] = {
- "total_dockets": total_dockets_created,
- "total_rds": total_rds_created,
- "last_line": line,
- "date_time": datetime.now().isoformat(),
- }
- pipe.hset(log_key, mapping=log_info)
- pipe.expire(log_key, 60 * 60 * 24 * 28) # 4 weeks
- pipe.execute()
- return log_info
-
-
-def download_file(item_path: str, order: int) -> tuple[bytes, str, int]:
- """Small wrapper to download and read a file from S3.
- :param item_path: The file path to download.
- :param order: The original order of the file to keep in the queue.
- :return: A tuple of the binary content of the file, the file path and the
- file order.
- """
- bucket = S3PrivateUUIDStorage()
- with bucket.open(item_path, mode="rb") as f:
- binary_content = f.read()
- return binary_content, item_path, order
-
-
-def download_files_from_paths(
- item_paths: list[str],
- files_queue: Queue,
- last_thread: threading.Thread | None,
-) -> None:
- """Download multiple files concurrently and store them to a Queue.
- :param item_paths: The list of file paths to download.
- :param files_queue: The Queue where store the downloaded files.
- :param last_thread: The previous thread launched.
- :return: None
- """
-
- order = 0
- with concurrent.futures.ThreadPoolExecutor() as executor:
- concurrent_downloads = []
- for item_path in item_paths:
- concurrent_downloads.append(
- executor.submit(download_file, item_path, order)
- )
- order += 1
-
- # Wait for all the downloads to complete.
- completed_downloads = list(
- concurrent.futures.as_completed(concurrent_downloads)
- )
- # Order the downloads to preserver their original chron order.
- completed_downloads.sort(key=lambda a: a.result()[2])
- # Add files to the Queue
- for download in completed_downloads:
- if last_thread:
- # # Wait until the last thread completes, so we don't mess up
- # the chronological order.
- last_thread.join()
- files_queue.put(download.result())
-
-
-def download_files_concurrently(
- files_queue: Queue,
- file_path: str,
- files_downloaded_offset: int,
- threads: list[threading.Thread],
-) -> int:
- """Get the next files to download and start a thread to download them.
- :param files_queue: The Queue where store the downloaded files.
- :param file_path: The file containing the list of paths to download.
- :param files_downloaded_offset: The files that have been already downloaded
- :param threads: The list of threads.
- :return: The files_downloaded_offset updated
- """
-
- files_to_download = []
- linecache.clearcache()
- linecache.checkcache(file_path)
- if files_queue.qsize() < FILES_BUFFER_THRESHOLD - 1:
- for j in range(FILES_BUFFER_THRESHOLD):
- # Get the next paths to download.
- next_line = linecache.getline(
- file_path, files_downloaded_offset + 1
- )
- if next_line:
- files_to_download.append(unquote(next_line).replace("\n", ""))
- files_downloaded_offset += 1
-
- # Download the files concurrently.
- if files_to_download:
- last_thread = None
- if threads:
- last_thread = threads[-1]
- download_thread = threading.Thread(
- target=download_files_from_paths,
- args=(files_to_download, files_queue, last_thread),
- )
- download_thread.start()
- threads.append(download_thread)
-
- return files_downloaded_offset
-
-
-def iterate_and_import_files(
- options: OptionsType, threads: list[threading.Thread]
-) -> None:
- """Iterate over the inventory file and import all new items.
-
- - Merge into the DB
- - Add to solr
- - Do not send alerts or webhooks
- - Do not touch dockets with entries (troller data is old)
- - Do not parse (add) district/bankruptcy courts feeds after 2018-4-20
- that is the RSS feeds started being scraped by RECAP.
-
- :param options: The command line options
- :param threads: A list of Threads.
- :return: None
- """
-
- # Enable automatic garbage collection.
- gc.enable()
- f = open(options["file"], "r", encoding="utf-8")
- total_dockets_created = 0
- total_rds_created = 0
-
- files_queue: Queue = Queue(maxsize=FILES_BUFFER_THRESHOLD)
- files_downloaded_offset = options["offset"]
- for i, line in enumerate(f):
- if i < options["offset"]:
- continue
- if i >= options["limit"] > 0:
- break
-
- # If the files_queue has less than FILES_BUFFER_THRESHOLD files, then
- # download more files ahead and store them to the queue.
- files_downloaded_offset = download_files_concurrently(
- files_queue, f.name, files_downloaded_offset, threads
- )
-
- # Process a file from the queue.
- binary, item_path, order = files_queue.get()
- court_id = get_court_from_line(item_path)
- logger.info(f"Attempting: {item_path=} with {court_id=} \n")
- if not court_id:
- # Probably a court we don't know
- continue
- try:
- feed_data, build_date = parse_file(binary, court_id)
- except ParserError:
- logger.info(
- f"Skipping: {item_path=} with {court_id=} due to incorrect date format. \n"
- )
- continue
- rds_for_solr, dockets_created = async_to_sync(merge_rss_data)(
- feed_data, court_id, build_date
- )
-
- total_dockets_created += dockets_created
- total_rds_created += len(rds_for_solr)
-
- # Mark the file as completed and remove it from the queue.
- files_queue.task_done()
-
- # Remove completed download threads from the list of threads.
- for thread in threads:
- if not thread.is_alive():
- threads.remove(thread)
- logger.info(f"Last line imported: {i} \n")
-
- if not i % 25:
- # Log every 25 lines.
- log_added_items_to_redis(
- total_dockets_created, total_rds_created, i
- )
- # Restart counters after logging into redis.
- total_dockets_created = 0
- total_rds_created = 0
-
- # Ensure garbage collector is called at the end of each iteration.
- gc.collect()
- f.close()
-
-
-class Command(VerboseCommand):
- help = "Import the troller BK RSS files from S3 to the DB"
-
- def add_arguments(self, parser):
- parser.add_argument(
- "--offset",
- type=int,
- default=0,
- help="The number of items to skip before beginning. Default is to "
- "skip none.",
- )
- parser.add_argument(
- "--limit",
- type=int,
- default=0,
- help="After doing this number, stop. This number is not additive "
- "with the offset parameter. Default is to do all of them.",
- )
- parser.add_argument(
- "--file",
- type=str,
- help="Where is the text file that has the list of paths from the "
- "bucket? Create this from an S3 inventory file, by removing "
- "all but the path column",
- )
-
- def handle(self, *args, **options):
- super().handle(*args, **options)
- if not options["file"]:
- raise argparse.ArgumentError(
- "The 'file' argument is required for that action."
- )
-
- threads = []
- try:
- iterate_and_import_files(options, threads)
- except KeyboardInterrupt:
- logger.info("The importer has stopped, waiting threads to exit.")
- for thread in threads:
- thread.join()
- sys.exit(1)
-
-
-troller_ids = {
- "88AC552F-BDBA-C444-1BD52598BA252265": "nmb",
- "DCCF0395-BDBA-C444-149D8D8EFA2EC03D": "almb",
- "DCCF03A4-BDBA-C444-13AFEC481CF81C91": "alnb",
- "DCCF03B4-BDBA-C444-180877EB555CF90A": "alsb",
- "DCCF03C3-BDBA-C444-10B70B118120A4F8": "akb",
- "DCCF03D3-BDBA-C444-1EA2D2D99D26D437": "azb",
- "DCCF03E3-BDBA-C444-11C3D8B9C688D49E": "areb",
- "DCCF03F2-BDBA-C444-14974FDC2C6DD113": "arwb",
- "DCCF0412-BDBA-C444-1C60416590832545": "cacb",
- "DCCF0421-BDBA-C444-12F451A14D4239AC": "caeb",
- "DCCF0431-BDBA-C444-1CE9AB1898357D63": "canb",
- "DCCF0440-BDBA-C444-1C8FEECE5B5AD482": "casb",
- "DCCF0460-BDBA-C444-1282B46DCB6DF058": "cob",
- "DCCF046F-BDBA-C444-126D999DD997D9A5": "ctb",
- "DCCF047F-BDBA-C444-16EA4D3A7417C840": "deb",
- "DCCF048F-BDBA-C444-12505144CA111B75": "dcb",
- "DCCF049E-BDBA-C444-107C577164350B1E": "flmb",
- "DCCF04BD-BDBA-C444-17B566BCA4E30864": "flnb",
- "DCCF04CD-BDBA-C444-13315D191ADF5852": "flsb",
- "DCCF04DD-BDBA-C444-11B09E58A8308286": "gamb",
- "DCCF04EC-BDBA-C444-113648D978F0FF3B": "ganb",
- "DCCF04FC-BDBA-C444-167F8376D8DF181B": "gasb",
- "DCCF050C-BDBA-C444-1191B98D5C279255": "gub",
- "DCCF051B-BDBA-C444-10E608B4E279AE73": "hib",
- "DCCF052B-BDBA-C444-1128ADF2BE776FF5": "idb",
- "DCCF053A-BDBA-C444-1E17C5EDDAAA98B3": "ilcb",
- "DCCF055A-BDBA-C444-1B33BEAA267C9EF3": "ilnb",
- "DCCF0569-BDBA-C444-10AAC89D6254827B": "ilsb",
- "DCCF0579-BDBA-C444-13FDD2CBFCA0428E": "innb",
- "DCCF0589-BDBA-C444-1403298F660F3248": "insb",
- "DCCF0598-BDBA-C444-1D4AA3760C808AC6": "ianb",
- "DCCF05A8-BDBA-C444-147676B19FFD9A64": "iasb",
- "DCCF05B7-BDBA-C444-1159BABEABFF7AD8": "ksb",
- "DCCF05C7-BDBA-C444-181132DD188F5B98": "kyeb",
- "DCCF05D7-BDBA-C444-173EA852DA3C02F3": "kywb",
- "DCCF05E6-BDBA-C444-1BBCF61EC04D7339": "laeb",
- "DCCF05F6-BDBA-C444-1CC8B0B3A0BA9BBE": "lamb",
- "DCCF0606-BDBA-C444-156EC6BFC06D300C": "lawb",
- "DCCF0615-BDBA-C444-12DA3916397575D1": "meb",
- "DCCF0625-BDBA-C444-16B46E54DD6D2B3F": "mdb",
- "DCCF0634-BDBA-C444-172D1B61491F44EB": "mab",
- "DCCF0644-BDBA-C444-16D30512F57AD7E7": "mieb",
- "DCCF0654-BDBA-C444-1B26AFB780F7E57D": "miwb",
- "DCCF0663-BDBA-C444-1E2D50E14B7E69B6": "mnb",
- "DCCF0673-BDBA-C444-162C60670DF8F3CC": "msnb",
- "DCCF0683-BDBA-C444-16D08467B7FFD39C": "mssb",
- "DCCF0692-BDBA-C444-105A607741D9B25E": "moeb",
- "DCCF06B1-BDBA-C444-1D0081621397B587": "mowb",
- "DCCF06C1-BDBA-C444-116BC0B37A3105FA": "mtb",
- "DCCF06D1-BDBA-C444-16605BEF7E402AFF": "neb",
- "DCCF06E0-BDBA-C444-142566FBDE706DF9": "nvb",
- "DCCF06F0-BDBA-C444-15CEC5BC7E8811B0": "nhb",
- "DCCF0700-BDBA-C444-1833C704F349B4C5": "njb",
- "DCCF071F-BDBA-C444-12E80A7584DAB242": "nyeb",
- "DCCF072E-BDBA-C444-161CCB961DC28EAA": "nynb",
- "DCCF073E-BDBA-C444-195A319E0477A40F": "nysb",
- "DCCF075D-BDBA-C444-1A4574BEA4332780": "nywb",
- "DCCF076D-BDBA-C444-1D86BA6110EAC8EB": "nceb",
- "DCCF077D-BDBA-C444-19E00357E47293C6": "ncmb",
- "DCCF078C-BDBA-C444-13A763C27712238D": "ncwb",
- "DCCF079C-BDBA-C444-152775C142804DBF": "ndb",
- "DCCF07AB-BDBA-C444-1909DD6A1D03789A": "ohnb",
- "DCCF07BB-BDBA-C444-15CC4C79DA8F0883": "ohsb",
- "DCCF07CB-BDBA-C444-16A03EA3C59A0E65": "okeb",
- "DCCF07DA-BDBA-C444-19C1613A6E47E8CC": "oknb",
- "DCCF07EA-BDBA-C444-11A55B458254CDA2": "okwb",
- "DCCF07FA-BDBA-C444-1931F6C553EEC927": "orb",
- "DCCF0819-BDBA-C444-121A57E62D0F901B": "paeb",
- "DCCF0838-BDBA-C444-11578199813DA094": "pamb",
- "DCCF0848-BDBA-C444-1FDC44C3E5C7F028": "pawb",
- "DCCF0857-BDBA-C444-1249D33530373C4A": "prb",
- "DCCF0867-BDBA-C444-11F248F5A172BED7": "rib",
- "DCCF0877-BDBA-C444-140D6F0E2517D28A": "scb",
- "DCCF0886-BDBA-C444-1FA114144D695156": "sdb",
- "DCCF0896-BDBA-C444-19AE23DDBC293010": "tneb",
- "DCCF08A5-BDBA-C444-16F88B92DFEFF2D7": "tnmb",
- "DCCF08B5-BDBA-C444-1015B0D4FD4EA2BB": "tnwb",
- "DCCF08D4-BDBA-C444-17A1F7F9130C2B5A": "txeb",
- "DCCF08E4-BDBA-C444-1FF320EDE23FE1C4": "txnb",
- "DCCF08F4-BDBA-C444-137D9095312F2A26": "txsb",
- "DCCF0903-BDBA-C444-1F1B7B299E8BEDEC": "txwb",
- "DCCF0913-BDBA-C444-1426E01E34A098A8": "utb",
- "DCCF0922-BDBA-C444-1E7C4839C9DDE0DD": "vtb",
- "DCCF0932-BDBA-C444-1E3B6019198C4AF3": "vib",
- "DCCF0942-BDBA-C444-15DE36A8BF619EE3": "vaeb",
- "DCCF0951-BDBA-C444-156287CAA9B5EA92": "vawb",
- "DCCF0961-BDBA-C444-113035CFC50A69B8": "waeb",
- "DCCF0971-BDBA-C444-1AE1249D4E72B62E": "wawb",
- "DCCF0980-BDBA-C444-12EE39B96F6E2CAD": "wvnb",
- "DCCF0990-BDBA-C444-16831E0CC62633BB": "wvsb",
- "DCCF099F-BDBA-C444-163A7EEE0EB991F6": "wieb",
- "DCCF09BF-BDBA-C444-1D3842A8131499EF": "wiwb",
- "DCCF09CE-BDBA-C444-1B4915E476D3A9D2": "wyb",
- "Mariana": "nmib",
- "640": "almd",
- "645": "alsd",
- "648": "akd",
- "651": "azd",
- "653": "ared",
- "656": "arwd",
- "659": "cacd",
- "662": "caed",
- "664": "cand",
- "667": "casd",
- "670": "cod",
- "672": "ctd",
- "675": "ded",
- "678": "dcd",
- "681": "flmd",
- "686": "flsd",
- "689": "gamd",
- "696": "gud",
- "699": "hid",
- "701": "idd",
- "704": "ilcd",
- "707": "ilnd",
- "712": "innd",
- "715": "insd",
- "717": "iand",
- "720": "iasd",
- "723": "ksd",
- "728": "kywd",
- "731": "laed",
- "734": "lamd",
- "737": "lawd",
- "740": "med",
- "744": "mad",
- "747": "mied",
- "750": "miwd",
- "757": "mssd",
- "759": "moed",
- "762": "mowd",
- "765": "mtd",
- "768": "ned",
- "771": "nvd",
- "773": "nhd",
- "776": "njd",
- "779": "nmd",
- "781": "nyed",
- "784": "nynd",
- "787": "nysd",
- "792": "nced",
- "795": "ncmd",
- "798": "ncwd",
- "803": "nmid",
- "806": "ohnd",
- "811": "ohsd",
- "818": "okwd",
- "821": "ord",
- "823": "paed",
- "826": "pamd",
- "829": "pawd",
- "832": "prd",
- "835": "rid",
- "840": "sdd",
- "843": "tned",
- "846": "tnmd",
- "849": "tnwd",
- "851": "txed",
- "854": "txnd",
- "856": "txsd",
- "859": "txwd",
- "862": "utd",
- "865": "vtd",
- "868": "vid",
- "873": "vawd",
- "876": "waed",
- "879": "wawd",
- "882": "wvnd",
- "885": "wvsd",
- "888": "wied",
- "891": "wiwd",
- "894": "wyd",
- # Appellate
- "609": "ca6",
- "619": "ca10",
- "625": "cadc",
- "628": "cafc",
- # I don't think we currently crawl these. Worth checking.
- "633": "uscfc",
- "636": "cit",
-}
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index a00a5e4448..cf242656fa 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1614,7 +1614,7 @@ def get_docket_by_pacer_case_id(
:param tag_names: A list of tag names that should be stored with the item
in the DB.
:param kwargs: A variety of keyword args to pass to DocketReport.query().
- :return: A dict indicating if we need to update Solr.
+ :return: A dict indicating if we need to update the search engine.
"""
if data is None:
logger.info("Empty data argument. Terminating chains and exiting.")
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index c26207e871..b631fc3829 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -27,8 +27,6 @@
CaseLawCourtFactory,
CaseLawFactory,
CitationFactory,
- RssDocketDataFactory,
- RssDocketEntryDataFactory,
)
from cl.corpus_importer.import_columbia.columbia_utils import fix_xml_tags
from cl.corpus_importer.import_columbia.parse_opinions import (
@@ -57,11 +55,6 @@
normalize_authors_in_opinions,
normalize_panel_in_opinioncluster,
)
-from cl.corpus_importer.management.commands.troller_bk import (
- download_files_concurrently,
- log_added_items_to_redis,
- merge_rss_data,
-)
from cl.corpus_importer.management.commands.update_casenames_wl_dataset import (
check_case_names_match,
parse_citations,
@@ -90,7 +83,6 @@
)
from cl.lib.pacer import process_docket_data
from cl.lib.redis_utils import get_redis_interface
-from cl.lib.timezone_helpers import localize_date_and_time
from cl.people_db.factories import PersonWithChildrenFactory, PositionFactory
from cl.people_db.lookup_utils import (
extract_judge_last_name,
@@ -99,22 +91,18 @@
)
from cl.people_db.models import Attorney, AttorneyOrganization, Party
from cl.recap.models import UPLOAD_TYPE, PacerHtmlFiles
-from cl.recap_rss.models import RssItemCache
from cl.scrapers.models import PACERFreeDocumentRow
from cl.search.factories import (
CourtFactory,
- DocketEntryWithParentsFactory,
DocketFactory,
OpinionClusterFactory,
OpinionClusterFactoryMultipleOpinions,
OpinionClusterFactoryWithChildrenAndParents,
OpinionClusterWithParentsFactory,
OpinionWithChildrenFactory,
- RECAPDocumentFactory,
)
from cl.search.models import (
SOURCES,
- BankruptcyInformation,
Citation,
Court,
Docket,
@@ -1120,1281 +1108,6 @@ def test_normalize_panel_str(self):
self.assertEqual(len(cluster.panel.all()), 2)
-def mock_download_file(item_path, order):
- time.sleep(randint(1, 10) / 100)
- return b"", item_path, order
-
-
-class TrollerBKTests(TestCase):
- @classmethod
- def setUpTestData(cls) -> None:
- # District factories
- cls.court = CourtFactory(id="canb", jurisdiction="FB")
- cls.court_neb = CourtFactory(id="nebraskab", jurisdiction="FD")
- cls.court_pamd = CourtFactory(id="pamd", jurisdiction="FD")
- cls.docket_d_before_2018 = DocketFactory(
- case_name="Young v. State",
- docket_number="3:17-CV-01477",
- court=cls.court,
- source=Docket.HARVARD,
- pacer_case_id="1234",
- )
-
- cls.docket_d_after_2018 = DocketFactory(
- case_name="Dragon v. State",
- docket_number="3:15-CV-01455",
- court=cls.court,
- source=Docket.HARVARD,
- pacer_case_id="5431",
- )
-
- cls.de_d_before_2018 = DocketEntryWithParentsFactory(
- docket__court=cls.court,
- docket__case_name="Young Entry v. Dragon",
- docket__docket_number="3:87-CV-01400",
- docket__source=Docket.HARVARD,
- docket__pacer_case_id="9038",
- entry_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=4), timezone.utc
- ),
- )
-
- # Appellate factories
- cls.court_appellate = CourtFactory(id="ca1", jurisdiction="F")
- cls.docket_a_before_2018 = DocketFactory(
- case_name="Young v. State",
- docket_number="12-2532",
- court=cls.court_appellate,
- source=Docket.HARVARD,
- pacer_case_id=None,
- )
- cls.docket_a_after_2018 = DocketFactory(
- case_name="Dragon v. State",
- docket_number="15-1232",
- court=cls.court_appellate,
- source=Docket.HARVARD,
- pacer_case_id=None,
- )
- cls.de_a_before_2018 = DocketEntryWithParentsFactory(
- docket__court=cls.court_appellate,
- docket__case_name="Young Entry v. Dragon",
- docket__docket_number="12-3242",
- docket__source=Docket.HARVARD,
- docket__pacer_case_id=None,
- entry_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=4), timezone.utc
- ),
- )
- cls.docket_a_2018_case_id = DocketFactory(
- case_name="Young v. State",
- docket_number="12-5674",
- court=cls.court_appellate,
- source=Docket.RECAP,
- pacer_case_id="12524",
- )
-
- @classmethod
- def restart_troller_log(cls):
- r = get_redis_interface("STATS")
- key = r.keys("troller_bk:log")
- if key:
- r.delete(*key)
-
- def setUp(self) -> None:
- self.restart_troller_log()
-
- def test_merge_district_rss_before_2018(self):
- """1 Test merge district RSS file before 2018-4-20 into an existing
- docket
-
- Before 2018-4-20
- District
- Docket exists
- No docket entries
-
- Merge docket entries, avoid updating metadata.
- """
- d_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Young v. Dragon",
- docket_number="3:17-CV-01473",
- pacer_case_id="1234",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.docket_d_before_2018.docket_entries.all()), 0
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_before_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.docket_d_before_2018.refresh_from_db()
- self.assertEqual(self.docket_d_before_2018.case_name, "Young v. State")
- self.assertEqual(
- self.docket_d_before_2018.docket_number, "3:17-CV-01477"
- )
- self.assertEqual(
- len(self.docket_d_before_2018.docket_entries.all()), 1
- )
- self.assertEqual(
- self.docket_d_before_2018.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_avoid_merging_district_rss_after_2018(self):
- """2 Test avoid merging district RSS file after 2018-4-20
-
- After 2018-4-20
- District
- Docket exists
- No docket entries
-
- Don't merge docket entries, avoid updating metadata.
- """
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Dragon 1 v. State",
- docket_number="3:15-CV-01456",
- pacer_case_id="5431",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2018, month=4, day=21), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(len(self.docket_d_after_2018.docket_entries.all()), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
- self.docket_d_after_2018.refresh_from_db()
- self.assertEqual(self.docket_d_after_2018.case_name, "Dragon v. State")
- self.assertEqual(
- self.docket_d_after_2018.docket_number, "3:15-CV-01455"
- )
- self.assertEqual(len(self.docket_d_after_2018.docket_entries.all()), 0)
- self.assertEqual(self.docket_d_after_2018.source, Docket.HARVARD)
-
- def test_merge_district_courts_rss_exceptions_after_2018(self):
- """Test merging district RSS exceptions after 2018-4-20
-
- After 2018-4-20
- District ["miwb", "nceb", "pamd", "cit"]
- Docket doesn't exists
- No docket entries
-
- Create docket, merge docket entries.
- """
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court_pamd.pk,
- case_name="Dragon 1 v. State",
- docket_number="3:15-CV-01456",
- pacer_case_id="54312",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2018, month=4, day=21), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(len(self.docket_d_after_2018.docket_entries.all()), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], self.court_pamd.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
-
- docket = Docket.objects.get(pacer_case_id="54312")
- self.assertEqual(docket.case_name, "Dragon 1 v. State")
- self.assertEqual(docket.docket_number, "3:15-CV-01456")
-
- def test_merging_district_docket_with_entries_before_2018(self):
- """3 Test merge district RSS file before 2018-4-20 into a
- docket with entries.
-
- Before 2018-4-20
- District
- Docket exists
- Docket entries
-
- Only merge entry if it doesn't exist, avoid updating metadata.
- """
- d_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Young v. Dragon",
- docket_number="3:17-CV-01473",
- pacer_case_id="9038",
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="2",
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = d_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.de_d_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_before_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.de_d_before_2018.refresh_from_db()
- self.assertEqual(
- self.de_d_before_2018.docket.case_name, "Young Entry v. Dragon"
- )
- self.assertEqual(
- self.de_d_before_2018.docket.docket_number, "3:87-CV-01400"
- )
- self.assertEqual(
- len(self.de_d_before_2018.docket.docket_entries.all()), 2
- )
- self.assertEqual(
- self.de_d_before_2018.docket.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_avoid_merging_updating_docket_item_without_docket_entries(
- self,
- ):
- """Test avoid merging or updating the docket when the RSS item doesn't
- contain entries.
-
- Docket exists
- Docket entries
-
- Avoid updating metadata.
- """
- d_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Young v. Dragon",
- docket_number="3:17-CV-01473",
- pacer_case_id="9038",
- docket_entries=[],
- )
-
- build_date = make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- self.assertEqual(
- len(self.de_d_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_before_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
- self.assertEqual(self.de_d_before_2018.docket.source, Docket.HARVARD)
-
- def test_add_new_district_rss_before_2018(self):
- """4 Test adds a district RSS file before 2018-4-20, new docket.
-
- Before: 2018-4-20
- District
- Docket doesn't exist
- No docket entries
-
- Create docket, merge docket entries.
- """
- d_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Youngs v. Dragon",
- docket_number="3:20-CV-01473",
- pacer_case_id="43562",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_before_2018["docket_entries"][0]["date_filed"]
- dockets = Docket.objects.filter(pacer_case_id="43562")
- self.assertEqual(dockets.count(), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_before_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
- self.assertEqual(dockets[0].case_name, "Youngs v. Dragon")
- self.assertEqual(dockets[0].docket_number, "3:20-CV-01473")
- self.assertEqual(len(dockets[0].docket_entries.all()), 1)
- self.assertEqual(dockets[0].source, Docket.RECAP)
-
- def test_avoid_merging_rss_docket_with_entries_district_after_2018(self):
- """5 Test avoid merging district RSS file after 2018-4-20 into a
- docket with entries.
-
- After 2018-4-20
- District
- Docket exists
- Docket entries
-
- Don't merge docket entries, avoid updating metadata.
- """
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Young v. Dragons 2",
- docket_number="3:57-CV-01453",
- pacer_case_id="9038",
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="2",
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.de_d_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
- self.de_d_before_2018.refresh_from_db()
- self.assertEqual(
- self.de_d_before_2018.docket.case_name, "Young Entry v. Dragon"
- )
- self.assertEqual(
- self.de_d_before_2018.docket.docket_number, "3:87-CV-01400"
- )
- self.assertEqual(
- len(self.de_d_before_2018.docket.docket_entries.all()), 1
- )
- self.assertEqual(self.de_d_before_2018.docket.source, Docket.HARVARD)
-
- def test_avoid_adding_new_district_rss_after_2018(self):
- """6 Test avoid adding district RSS file after 2018-4-20.
-
- After 2018-4-20
- District
- Docket doesn't exist
- No docket entries
-
- Do not create docket, do not merge docket entries.
- """
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court.pk,
- case_name="Youngs v. Dragon",
- docket_number="3:20-CV-01473",
- pacer_case_id="53432",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], self.court.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
-
- # Appellate
- def test_merge_appellate_rss_before_2018(self):
- """7 Test merge an appellate RSS file before 2018-4-20
-
- Before 2018-4-20
- Appellate
- Docket exists
- No docket entries
-
- Merge docket entries, avoid updating metadata.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-2532",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.docket_a_before_2018.docket_entries.all()), 0
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.docket_a_before_2018.refresh_from_db()
- self.assertEqual(self.docket_a_before_2018.case_name, "Young v. State")
- self.assertEqual(self.docket_a_before_2018.docket_number, "12-2532")
- self.assertEqual(
- len(self.docket_a_before_2018.docket_entries.all()), 1
- )
- self.assertEqual(
- self.docket_a_before_2018.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_merging_appellate_rss_after_2018(self):
- """8 Test appellate RSS file after 2018-4-20
-
- After 2018-4-20
- Appellate
- Docket exists
- No docket entries
-
- Merge docket entries, avoid updating metadata.
- """
- a_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Dragon 1 v. State",
- docket_number="15-1232",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2018, month=4, day=21), timezone.utc
- )
- )
- ],
- )
-
- build_date = a_rss_data_after_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(len(self.docket_a_after_2018.docket_entries.all()), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_after_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.docket_a_after_2018.refresh_from_db()
- self.assertEqual(self.docket_a_after_2018.case_name, "Dragon v. State")
- self.assertEqual(self.docket_a_after_2018.docket_number, "15-1232")
- self.assertEqual(len(self.docket_a_after_2018.docket_entries.all()), 1)
- self.assertEqual(
- self.docket_a_after_2018.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_avoid_merging_existing_appellate_entry_before_2018(self):
- """9 Test avoid merging appellate RSS file before 2018-4-20, docket
- with entries.
-
- Before 2018-4-20
- Appellate
- Docket exists
- Docket entries
-
- Don't merge docket entries, avoid updating metadata.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-3242",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="2",
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.de_a_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.de_a_before_2018.refresh_from_db()
- self.assertEqual(
- self.de_a_before_2018.docket.case_name, "Young Entry v. Dragon"
- )
- self.assertEqual(self.de_a_before_2018.docket.docket_number, "12-3242")
- self.assertEqual(
- len(self.de_a_before_2018.docket.docket_entries.all()), 2
- )
- self.assertEqual(
- self.de_a_before_2018.docket.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_merge_new_appellate_rss_before_2018(self):
- """10 Merge a new appellate RSS file before 2018-4-20
-
- Before: 2018-4-20
- Appellate
- Docket doesn't exist
- No docket entries
-
- Create docket, merge docket entries.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Youngs v. Dragon",
- docket_number="23-4233",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- dockets = Docket.objects.filter(docket_number="23-4233")
- self.assertEqual(dockets.count(), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
- self.assertEqual(dockets[0].case_name, "Youngs v. Dragon")
- self.assertEqual(dockets[0].docket_number, "23-4233")
- self.assertEqual(len(dockets[0].docket_entries.all()), 1)
- self.assertEqual(dockets[0].source, Docket.RECAP)
-
- def test_avoid_merging_existing_appellate_entry_after_2018(self):
- """11 Test avoid merging appellate RSS file after 2018-4-20, docket with
- entries.
-
- After: 2018-4-20
- Appellate
- Docket exists
- Docket entry exist
-
- Don't merge the existing entry, avoid updating metadata.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-3242",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="1",
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.de_a_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
-
- def test_merging_appellate_docket_with_entries_after_2018(self):
- """Test merge appellate RSS file after 2018-4-20, docket with
- entries.
-
- After: 2018-4-20
- Appellate
- Docket exists
- Docket entries
-
- Only merge entry if it doesn't exist, avoid updating metadata.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-3242",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="2",
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.de_a_before_2018.docket.docket_entries.all()), 1
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.de_a_before_2018.refresh_from_db()
- self.assertEqual(
- self.de_a_before_2018.docket.case_name, "Young Entry v. Dragon"
- )
- self.assertEqual(self.de_a_before_2018.docket.docket_number, "12-3242")
- self.assertEqual(
- len(self.de_a_before_2018.docket.docket_entries.all()), 2
- )
- self.assertEqual(
- self.de_a_before_2018.docket.source, Docket.HARVARD_AND_RECAP
- )
-
- def test_merge_new_appellate_rss_after_2018(self):
- """12 Merge a new appellate RSS file after 2018-4-20
-
- After: 2018-4-20
- Appellate
- Docket doesn't exist
- No docket entries
-
- Create docket, merge docket entries, .
- """
-
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Youngs v. Dragon",
- docket_number="45-3232",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- dockets = Docket.objects.filter(docket_number="45-3232")
- self.assertEqual(dockets.count(), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
- self.assertEqual(dockets.count(), 1)
- self.assertEqual(dockets[0].case_name, "Youngs v. Dragon")
- self.assertEqual(dockets[0].docket_number, "45-3232")
- self.assertEqual(len(dockets[0].docket_entries.all()), 1)
- self.assertEqual(dockets[0].source, Docket.RECAP)
-
- def test_merging_appellate_docket_with_entries_case_id(self):
- """Test merge an appellate RSS file into a docket with pacer_case_id
- Find docket by docket_number_core, avoid duplicating.
- Merge docket entries, avoid updating metadata.
- """
- a_rss_data_before_2018 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-5674",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number="2",
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- ),
- )
- ],
- )
-
- build_date = a_rss_data_before_2018["docket_entries"][0]["date_filed"]
- self.assertEqual(
- len(self.docket_a_2018_case_id.docket_entries.all()), 0
- )
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_before_2018], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 0)
- self.docket_a_2018_case_id.refresh_from_db()
- self.assertEqual(
- self.docket_a_2018_case_id.case_name, "Young v. State"
- )
- self.assertEqual(self.docket_a_2018_case_id.docket_number, "12-5674")
- self.assertEqual(self.docket_a_2018_case_id.pacer_case_id, "12524")
- self.assertEqual(
- len(self.docket_a_2018_case_id.docket_entries.all()), 1
- )
- self.assertEqual(self.docket_a_2018_case_id.source, Docket.RECAP)
-
- def test_log_added_items_to_redis(self):
- """Can we log dockets and rds added to redis, adding the previous
- value?
- """
- last_values = log_added_items_to_redis(100, 100, 50)
- self.assertEqual(last_values["total_dockets"], 100)
- self.assertEqual(last_values["total_rds"], 100)
- self.assertEqual(last_values["last_line"], 50)
-
- last_values = log_added_items_to_redis(50, 80, 100)
- self.assertEqual(last_values["total_dockets"], 150)
- self.assertEqual(last_values["total_rds"], 180)
- self.assertEqual(last_values["last_line"], 100)
-
- self.restart_troller_log()
-
- def test_merge_mapped_court_rss_before_2018(self):
- """Merge a court mapped RSS file before 2018-4-20
-
- before: 2018-4-20
- District neb -> nebraskab
- Docket doesn't exist
- No docket entries
-
- Create docket, merge docket entries, verify is assigned to nebraskab.
- """
-
- d_rss_data_before_2018 = RssDocketDataFactory(
- court_id="neb",
- case_name="Youngs v. Dragon",
- docket_number="3:20-CV-01473",
- pacer_case_id="43565",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2017, month=1, day=4), timezone.utc
- )
- )
- ],
- )
-
- build_date = d_rss_data_before_2018["docket_entries"][0]["date_filed"]
- dockets = Docket.objects.filter(docket_number="3:20-CV-01473")
- self.assertEqual(dockets.count(), 0)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_before_2018], "neb", build_date
- )
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
- self.assertEqual(dockets.count(), 1)
- self.assertEqual(dockets[0].case_name, "Youngs v. Dragon")
- self.assertEqual(dockets[0].docket_number, "3:20-CV-01473")
- self.assertEqual(len(dockets[0].docket_entries.all()), 1)
- self.assertEqual(dockets[0].source, Docket.RECAP)
- self.assertEqual(dockets[0].court.pk, "nebraskab")
-
- def test_avoid_merging_district_mapped_court_rss_after_2018(self):
- """Avoid merging a new district RSS file with mapped court
- after 2018-4-20.
-
- After: 2018-4-20
- District neb -> nebraskab
- Docket doesn't exist
- No docket entries
-
- Don't merge.
- """
-
- d_rss_data_after_2018 = RssDocketDataFactory(
- court_id="neb",
- case_name="Youngs v. Dragon",
- docket_number="3:20-CV-01473",
- pacer_case_id="43565",
- docket_entries=[
- RssDocketEntryDataFactory(
- date_filed=make_aware(
- datetime(year=2019, month=1, day=4), timezone.utc
- )
- )
- ],
- )
- build_date = d_rss_data_after_2018["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [d_rss_data_after_2018], "neb", build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
-
- def test_avoid_updating_docket_entry_metadata(self):
- """Test merge appellate RSS file after 2018-4-20, docket with
- entries.
-
- After: 2018-4-20
- Appellate
- Docket exists
- Docket entries
-
- Only merge entry if it doesn't exist, avoid updating metadata.
- """
-
- de_a_unnumbered = DocketEntryWithParentsFactory(
- docket__court=self.court_appellate,
- docket__case_name="Young Entry v. Dragon",
- docket__docket_number="12-3245",
- docket__source=Docket.HARVARD,
- docket__pacer_case_id=None,
- entry_number=None,
- description="Original docket entry description",
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- RECAPDocumentFactory(
- docket_entry=de_a_unnumbered, description="Opinion Issued"
- )
-
- a_rss_data_unnumbered = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- case_name="Young v. Dragon",
- docket_number="12-3245",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=None,
- description="New docket entry description",
- short_description="Opinion Issued",
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
- build_date = a_rss_data_unnumbered["docket_entries"][0]["date_filed"]
- self.assertEqual(len(de_a_unnumbered.docket.docket_entries.all()), 1)
- rds_created, d_created = async_to_sync(merge_rss_data)(
- [a_rss_data_unnumbered], self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 0)
- self.assertEqual(d_created, 0)
- de_a_unnumbered.refresh_from_db()
- self.assertEqual(
- de_a_unnumbered.docket.case_name, "Young Entry v. Dragon"
- )
- self.assertEqual(de_a_unnumbered.docket.docket_number, "12-3245")
- self.assertEqual(
- de_a_unnumbered.description, "Original docket entry description"
- )
- self.assertEqual(len(de_a_unnumbered.docket.docket_entries.all()), 1)
- self.assertEqual(
- de_a_unnumbered.date_filed,
- datetime(year=2018, month=1, day=4).date(),
- )
- self.assertEqual(de_a_unnumbered.docket.source, Docket.HARVARD)
-
- @patch("cl.corpus_importer.management.commands.troller_bk.logger")
- def test_avoid_cached_items(self, mock_logger):
- """Can we skip a whole file when a cached item is hit?"""
-
- a_rss_data_0 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="12-3247",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- ),
- ],
- )
-
- a_rss_data_1 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="12-3245",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
- a_rss_data_2 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="12-3246",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
-
- list_rss_data_1 = [a_rss_data_1, a_rss_data_2]
- list_rss_data_2 = [a_rss_data_0, a_rss_data_1]
-
- cached_items = RssItemCache.objects.all()
- self.assertEqual(cached_items.count(), 0)
- build_date = a_rss_data_0["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- list_rss_data_1, self.court_appellate.pk, build_date
- )
- self.assertEqual(len(rds_created), 2)
- self.assertEqual(d_created, 2)
- self.assertEqual(cached_items.count(), 2)
-
- # Remove recap_sequence_number from the dict to simulate the same item
- del a_rss_data_1["docket_entries"][0]["recap_sequence_number"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- list_rss_data_2, self.court_appellate.pk, build_date
- )
-
- # The file is aborted when a cached item is hit
- self.assertEqual(len(rds_created), 1)
- self.assertEqual(d_created, 1)
- self.assertEqual(cached_items.count(), 3)
- mock_logger.info.assert_called_with(
- f"Finished adding {self.court_appellate.pk} feed. Added {len(rds_created)} RDs."
- )
-
- @patch(
- "cl.corpus_importer.management.commands.troller_bk.download_file",
- side_effect=mock_download_file,
- )
- def test_download_files_concurrently(self, mock_download):
- """Test the download_files_concurrently method to verify proper
- fetching of the next paths to download from a file. Concurrently
- download these paths and add them to a queue in the original chronological order.
- """
- test_dir = (
- Path(settings.INSTALL_ROOT)
- / "cl"
- / "corpus_importer"
- / "test_assets"
- )
- import_filename = "import.csv"
- import_path = os.path.join(test_dir, import_filename)
-
- files_queue = Queue()
- threads = []
- files_downloaded_offset = 0
-
- with open(import_path, "rb") as f:
- files_downloaded_offset = download_files_concurrently(
- files_queue, f.name, files_downloaded_offset, threads
- )
- self.assertEqual(len(threads), 1)
- self.assertEqual(files_downloaded_offset, 3)
- files_downloaded_offset = download_files_concurrently(
- files_queue, f.name, files_downloaded_offset, threads
- )
-
- for thread in threads:
- thread.join()
-
- self.assertEqual(len(threads), 2)
- self.assertEqual(files_downloaded_offset, 6)
- self.assertEqual(files_queue.qsize(), 6)
-
- # Verifies original chronological order.
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 0)
- self.assertEqual(item_path.split("|")[1], "1575330086")
- files_queue.task_done()
-
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 1)
- self.assertEqual(item_path.split("|")[1], "1575333374")
- files_queue.task_done()
-
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 2)
- self.assertEqual(item_path.split("|")[1], "1575336978")
- files_queue.task_done()
-
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 0)
- self.assertEqual(item_path.split("|")[1], "1575340576")
- files_queue.task_done()
-
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 1)
- self.assertEqual(item_path.split("|")[1], "1575344176")
- files_queue.task_done()
-
- binary, item_path, order = files_queue.get()
- self.assertEqual(order, 2)
- self.assertEqual(item_path.split("|")[1], "1575380176")
- files_queue.task_done()
-
- self.assertEqual(files_queue.qsize(), 0)
-
- def test_add_objects_in_bulk(self):
- """Can we properly add related RSS feed objects in bulk?"""
-
- a_rss_data_0 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="15-3247",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- ),
- ],
- )
-
- a_rss_data_1 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="15-3245",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
- a_rss_data_2 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="15-3247",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=2,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
-
- a_rss_data_3 = RssDocketDataFactory(
- court_id=self.court_appellate.pk,
- docket_number="12-2532",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=5,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
-
- list_rss_data = [
- a_rss_data_0,
- a_rss_data_1,
- a_rss_data_2,
- a_rss_data_3,
- ]
- cached_items = RssItemCache.objects.all()
- self.assertEqual(cached_items.count(), 0)
-
- build_date = a_rss_data_0["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- list_rss_data, self.court_appellate.pk, build_date
- )
-
- date_filed, time_filed = localize_date_and_time(
- self.court_appellate.pk, build_date
- )
-
- # Only two dockets created: 15-3247 and 15-3245, 12-2532 already exists
- self.assertEqual(d_created, 2)
- self.assertEqual(len(rds_created), 4)
-
- # Compare docket entries and rds created for each docket.
- des_to_compare = [("15-3245", 1), ("15-3247", 2), ("12-2532", 1)]
- for d_number, de_count in des_to_compare:
- docket = Docket.objects.get(docket_number=d_number)
- self.assertEqual(len(docket.docket_entries.all()), de_count)
-
- # For every docket entry there is one recap document created.
- docket_entries = docket.docket_entries.all()
- for de in docket_entries:
- self.assertEqual(len(de.recap_documents.all()), 1)
- self.assertEqual(de.time_filed, time_filed)
- self.assertEqual(de.date_filed, date_filed)
- self.assertNotEqual(de.recap_sequence_number, "")
-
- # docket_number_core generated for every docket
- self.assertNotEqual(docket.docket_number_core, "")
- # Slug is generated for every docket
- self.assertNotEqual(docket.slug, "")
-
- # Verify RECAP source is added to existing and new dockets.
- if d_number == "12-2532":
- self.assertEqual(docket.source, Docket.HARVARD_AND_RECAP)
- else:
- self.assertEqual(docket.source, Docket.RECAP)
- # Confirm date_last_filing is added to each new docket.
- self.assertEqual(docket.date_last_filing, date_filed)
-
- # BankruptcyInformation is added only on new dockets.
- bankr_objs_created = BankruptcyInformation.objects.all()
- self.assertEqual(len(bankr_objs_created), 3)
-
- # Compare bankruptcy data is linked correctly to the parent docket.
- bankr_d_1 = BankruptcyInformation.objects.get(
- docket__docket_number=a_rss_data_0["docket_number"]
- )
- self.assertEqual(bankr_d_1.chapter, str(a_rss_data_0["chapter"]))
- self.assertEqual(
- bankr_d_1.trustee_str, str(a_rss_data_0["trustee_str"])
- )
-
- bankr_d_2 = BankruptcyInformation.objects.get(
- docket__docket_number=a_rss_data_1["docket_number"]
- )
- self.assertEqual(bankr_d_2.chapter, str(a_rss_data_1["chapter"]))
- self.assertEqual(
- bankr_d_2.trustee_str, str(a_rss_data_1["trustee_str"])
- )
-
- bankr_d_3 = BankruptcyInformation.objects.get(
- docket__docket_number=a_rss_data_3["docket_number"]
- )
- self.assertEqual(bankr_d_3.chapter, str(a_rss_data_3["chapter"]))
- self.assertEqual(
- bankr_d_3.trustee_str, str(a_rss_data_3["trustee_str"])
- )
-
- def test_avoid_adding_district_dockets_no_pacer_case_id_in_bulk(self):
- """Can we avoid adding district/bankr dockets that don't have a
- pacer_case_id?"""
-
- a_rss_data_0 = RssDocketDataFactory(
- court_id=self.court_neb.pk,
- docket_number="15-3247",
- pacer_case_id=None,
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- ),
- ],
- )
-
- a_rss_data_1 = RssDocketDataFactory(
- court_id=self.court_neb.pk,
- docket_number="15-3245",
- pacer_case_id="12345",
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=1,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- ],
- )
-
- list_rss_data = [
- a_rss_data_0,
- a_rss_data_1,
- ]
-
- build_date = a_rss_data_0["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- list_rss_data, self.court_neb.pk, build_date
- )
-
- # Only one docket created: 15-3245, since 15-3247 don't have pacer_case_id
- self.assertEqual(d_created, 1)
- self.assertEqual(len(rds_created), 1)
-
- # Compare docket entries and rds created for each docket.
- des_to_compare = [("15-3245", 1)]
- for d_number, de_count in des_to_compare:
- docket = Docket.objects.get(docket_number=d_number)
- self.assertEqual(len(docket.docket_entries.all()), de_count)
- # For every docket entry there is one recap document created.
- docket_entries = docket.docket_entries.all()
- for de in docket_entries:
- self.assertEqual(len(de.recap_documents.all()), 1)
- self.assertNotEqual(de.recap_sequence_number, "")
-
- # docket_number_core generated for every docket
- self.assertNotEqual(docket.docket_number_core, "")
- # Slug is generated for every docket
- self.assertNotEqual(docket.slug, "")
- self.assertEqual(docket.source, Docket.RECAP)
-
- # BankruptcyInformation is added only on new dockets.
- bankr_objs_created = BankruptcyInformation.objects.all()
- self.assertEqual(len(bankr_objs_created), 1)
-
- def test_avoid_adding_existing_entries_by_description(self):
- """Can we avoid adding district/bankr dockets that don't have a
- pacer_case_id?"""
-
- de = DocketEntryWithParentsFactory(
- docket__court=self.court,
- docket__case_name="Young Entry v. Dragon",
- docket__docket_number="3:87-CV-01409",
- docket__source=Docket.HARVARD,
- docket__pacer_case_id="90385",
- entry_number=None,
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- )
- RECAPDocumentFactory(docket_entry=de, description="Opinion Issued")
- a_rss_data_0 = RssDocketDataFactory(
- court_id=self.court,
- docket_number="3:87-CV-01409",
- pacer_case_id="90385",
- docket_entries=[
- RssDocketEntryDataFactory(
- document_number=None,
- short_description="Opinion Issued",
- date_filed=make_aware(
- datetime(year=2018, month=1, day=5), timezone.utc
- ),
- ),
- ],
- )
- list_rss_data = [
- a_rss_data_0,
- ]
- build_date = a_rss_data_0["docket_entries"][0]["date_filed"]
- rds_created, d_created = async_to_sync(merge_rss_data)(
- list_rss_data, self.court.pk, build_date
- )
-
- # No docket entry should be created
- self.assertEqual(d_created, 0)
- self.assertEqual(len(rds_created), 0)
-
-
@patch(
"cl.corpus_importer.management.commands.clean_up_mis_matched_dockets.download_file",
side_effect=lambda a: {
diff --git a/cl/favorites/templates/prayer_email.html b/cl/favorites/templates/prayer_email.html
index 121b2c93af..dbbf0a0e6e 100644
--- a/cl/favorites/templates/prayer_email.html
+++ b/cl/favorites/templates/prayer_email.html
@@ -1,5 +1,6 @@
{% load text_filters %}
{% load humanize %}
+{% load extras %}
{% load tz %}
@@ -44,7 +45,7 @@