Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(models.corrected_citation): correct page for NY Slip OP and Misc… #227

Merged
merged 1 commit into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Features:
- Adds helper function span_with_pincite() to get full citation with pin cite

Changes:
- None
- Add page correction to `corrected_citation`, for 'NY Slip Op' and 'Misc 3d'

Fixes:
- Strengthens error handling during the loading of the cached Hyperscan database. This ensures that an invalid cache triggers a rebuild.
Expand Down
34 changes: 29 additions & 5 deletions eyecite/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
cast,
)

from eyecite.utils import hash_sha256
from eyecite.utils import REPORTERS_THAT_NEED_PAGE_CORRECTION, hash_sha256

ResourceType = Hashable

Expand Down Expand Up @@ -308,12 +308,36 @@ def corrected_reporter(self):
)

def corrected_citation(self):
"""Return citation with corrected reporter."""
"""Return citation with corrected reporter and standardized page"""
corrected = self.matched_text()
if self.edition_guess:
return self.matched_text().replace(
self.groups["reporter"], self.edition_guess.short_name
corrected = corrected.replace(
self.groups.get("reporter"), self.edition_guess.short_name
)
return self.matched_text()

corrected_page = self.corrected_page()
if corrected_page and corrected_page != self.groups["page"]:
corrected = corrected.replace(self.groups["page"], corrected_page)

return corrected

def corrected_page(self):
"""Can we standardize a page value?"""
page = self.groups.get("page")
if page is None:
return

standard_reporter = ""
if reporter := self.groups.get("reporter"):
if self.edition_guess:
standard_reporter = self.edition_guess.short_name
if {
reporter,
standard_reporter,
} & REPORTERS_THAT_NEED_PAGE_CORRECTION:
return page.replace("[U]", "(U)").replace("[A]", "(A)")

return page

def guess_edition(self):
"""Set edition_guess."""
Expand Down
2 changes: 2 additions & 0 deletions eyecite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from lxml import etree

REPORTERS_THAT_NEED_PAGE_CORRECTION = {"NY Slip Op", "Misc. 3d"}

# Names not allowed to be reference citations
# this is partially taken from juriscraper
DISALLOWED_NAMES = [
Expand Down
10 changes: 5 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ include = ["eyecite/py.typed"]

[tool.poetry.dependencies]
python = ">=3.10,<4.0"
reporters-db = ">=3.2.52"
reporters-db = ">=3.2.53"
lxml = ">=4.6.3"
pyahocorasick = ">= 1.2"
fast-diff-match-patch = ">=2.0.0"
Expand Down
32 changes: 32 additions & 0 deletions tests/test_ModelsTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,35 @@ def test_corrected_full_citation_includes_closing_parenthesis(self):
full_case_citation.corrected_citation_full(),
"Bank v. Vinson, 477 U.S. 57, 60 (scotus 1986)",
)

def test_page_correction(self):
"""Can we correct pages on citation.corrected_citation()?"""
tests = [
(
"2024 N.Y. Slip Op. 51192(U)",
"2024 NY Slip Op 51192(U)",
"51192(U)",
),
("2024 NYSlipOp 51192[U]", "2024 NY Slip Op 51192(U)", "51192(U)"),
("11 Misc 3d 134[A]", "11 Misc. 3d 134(A)", "134(A)"),
("83 Misc.3d 126(A)", "83 Misc. 3d 126(A)", "126(A)"),
# cases where no page correction should happen
("11 U.S. 11[2]", "11 U.S. 11", "11"),
(
"Tex. Civ. Prac. & Rem. Code Ann. § 171.023",
"Tex. Code Ann. § 171.023",
None,
),
]
for citation, corrected_citation, corrected_page in tests:
cite = get_citations(citation)[0]
self.assertEqual(
cite.corrected_citation(),
corrected_citation,
"Page correction not working",
)
self.assertEqual(
cite.corrected_page(),
corrected_page,
"Standalone page correction not working",
)
Loading