Skip to content

Commit

Permalink
Update loading of final statute and IUCR fields
Browse files Browse the repository at this point in the history
Add final_statute_formatted field to the Disposition and Conviction
model.

Add a format_statute() function to format the parsed statute that will
go into final_statute_formatted.

Factor final_statute field population out into a method,
load_final_statute.

Remove trailing whitespace from files edited in this commit.

Rename statute2iucr management command to load_final_statute_and_iucr to
reflect these changes.

Fix statute.strip_surrounding_parens to handle empty string

Addresses sc3#7,
sc3/cook-convictions#83
  • Loading branch information
ghing committed Nov 27, 2014
1 parent f50182a commit fb89e3c
Show file tree
Hide file tree
Showing 7 changed files with 396 additions and 124 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import logging

from django.core.management.base import BaseCommand
from django.db import transaction

from convictions_data.models import Disposition
from convictions_data.statute import (get_iucr, IUCRLookupError,
ILCSLookupError, StatuteFormatError)

logger = logging.getLogger(__name__)

class Command(BaseCommand):
help = ("Load the final statute, nicely formatted statute and IUCR code "
"from the statute or ammended statute fields")

def handle(self, *args, **options):
with transaction.atomic():
for disposition in Disposition.objects.all():
if disposition.ammndchargstatute:
disposition.load_final_statute_and_iucr(disposition.ammndchargstatute)
disposition.save()
elif disposition.statute:
disposition.load_final_statute_and_iucr(disposition.statute)
disposition.save()
48 changes: 0 additions & 48 deletions convictions_data/management/commands/statute2iucr.py

This file was deleted.

Large diffs are not rendered by default.

90 changes: 50 additions & 40 deletions convictions_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
CensusTractManager, CommunityAreaManager, DispositionManager)

from convictions_data.query import ConvictionQuerySet
from convictions_data.statute import (get_iucr, ILCSLookupError,
IUCRLookupError, StatuteFormatError)
from convictions_data.statute import (get_iucr, parse_statute, format_statute,
MultipleMatchingILCSError, ILCSLookupError, IUCRLookupError,
StatuteFormatError)
from convictions_data.signals import post_load_spatial_data


Expand Down Expand Up @@ -171,9 +172,15 @@ class Disposition(models.Model):
amtoffine = models.IntegerField(null=True)

final_statute = models.CharField(max_length=50, default="",
help_text="Field to make querying easier. Set to the value of "
"ammndchargstatute if present, otherwise set to the value of statute",
help_text=("Field to make querying easier. Set to the value of "
"ammndchargstatute if present, otherwise set to the value "
"of statute"),
db_index=True)
final_statute_formatted = models.CharField(max_length=50, default="",
db_index=True,
help_text=("Value from final_statute but parsed and reformatted "
"to try to normalize the formats and make grouping "
"queries easier"))
final_chrgdesc = models.CharField(max_length=50, default="", db_index=True)
final_chrgtype = models.CharField(max_length=1, choices=CHRGTYPE_CHOICES,
default="", db_index=True)
Expand Down Expand Up @@ -282,23 +289,7 @@ def _load_field_statute(self, val):
self.statute = val

if val:
self.final_statute = val

try:
offenses = get_iucr(val)
if len(offenses) == 1:
self.iucr_code = offenses[0].code
self.iucr_category = offenses[0].offense_category
else:
logger.warn("Multiple matching IUCR offenses found for statute '{}'".format(val))
except IUCRLookupError as e:
logger.warn(e)
except ILCSLookupError as e:
logger.warn(e)
except AssertionError as e:
logger.warn(e)
except StatuteFormatError as e:
logger.warn(e)
self.load_final_statute_and_iucr(val)

return self

Expand Down Expand Up @@ -333,23 +324,36 @@ def _load_field_ammndchargstatute(self, val):
self.ammndchargstatute = val

if val:
self.final_statute = val
self.load_final_statute_and_iucr(val)

try:
offenses = get_iucr(val)
if len(offenses) == 1:
self.iucr_code = offenses[0].code
self.iucr_category = offenses[0].offense_category
else:
logger.warn("Multiple matching IUCR offenses found for statute '{}'".format(val))
except IUCRLookupError as e:
logger.warn(e)
except ILCSLookupError as e:
logger.warn(e)
except AssertionError as e:
logger.warn(e)
except StatuteFormatError as e:
logger.warn(e)
return self

def load_final_statute_and_iucr(self, val):
"""Populate the final_statute, final_statute_formatted, iucr_code and
iucr_category fields from the value."""
self.final_statute = val

try:
parsed_statute = parse_statute(val)
except (StatuteFormatError, ILCSLookupError, MultipleMatchingILCSError) as e:
logger.warn(e)
# If we weren't able to parse the statute, return early
return self
else:
# We've parsed the statute. Format it, and save this value.
self.final_statute_formatted = format_statute(parsed_statute)

try:
offenses = get_iucr(parsed_statute)
if len(offenses) == 1:
self.iucr_code = offenses[0].code
self.iucr_category = offenses[0].offense_category
else:
logger.warn("Multiple matching IUCR offenses found for statute '{}'".format(val))
except IUCRLookupError as e:
# HACK: The original error will have a nicely-formatted statute.
# Replace it with the raw statute value
logger.warn(IUCRLookupError(val))

return self

Expand Down Expand Up @@ -554,9 +558,15 @@ class Conviction(models.Model):

chrgdispdate = models.DateField(null=True)
final_statute = models.CharField(max_length=50, default="",
help_text="Field to make querying easier. Set to the value of "
"ammndchargstatute if present, otherwise set to the value of statute",
help_text=("Field to make querying easier. Set to the value of "
"ammndchargstatute if present, otherwise set to the value "
"of statute"),
db_index=True)
final_statute_formatted = models.CharField(max_length=50, default="",
db_index=True,
help_text=("Value from final_statute but parsed and reformatted "
"to try to normalize the formats and make grouping "
"queries easier"))
final_chrgdesc = models.CharField(max_length=50, default="", db_index=True)
final_chrgtype = models.CharField(max_length=1, choices=CHRGTYPE_CHOICES,
default="", db_index=True)
Expand Down Expand Up @@ -844,7 +854,7 @@ class County(geo_models.Model):
intptlat10 = geo_models.CharField(max_length=11)
intptlon10 = geo_models.CharField(max_length=12)
geom = geo_models.MultiPolygonField()

objects = geo_models.GeoManager()

FIELD_MAPPING = {
Expand Down
1 change: 1 addition & 0 deletions convictions_data/query/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def from_initial_chrgdispdate(self):
'fbiidno',
'fgrprntno',
'final_statute',
'final_statute_formatted',
'final_chrgdesc',
'final_chrgtype',
'final_chrgclass',
Expand Down
69 changes: 48 additions & 21 deletions convictions_data/statute.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
]]
ilcs_chapters_str = '|'.join(ilcs_chapters)
ilcs_statute_re = re.compile(r"""(?P<chapter>{chapters})
[- ] # Delimiter between chapter and act prefix
[- ] # Delimiter between chapter and act prefix
(?P<act_prefix>\d+)
[/\\] # Delimiter between act prefix and section
(?P<section>[\da-zA-Z.]+(-[\da-zA-Z.]+){{0,1}})
Expand All @@ -41,7 +41,7 @@
'38',
'42',
'56.5',
'95.5',
'95.5',
'121.5',
'124',
'134',
Expand All @@ -63,7 +63,7 @@ def __init__(self, chapter, paragraph, raw_statute=None):

def __str__(self):
msg = "Unable to find ILCS statute for raw statute '{}'".format(self.raw_statute)

return msg

class StatuteFormatError(Exception):
Expand All @@ -74,6 +74,15 @@ def __init__(self, raw_statute):
def __str__(self):
return "Can't understand statute '{}'".format(self.raw_statute)

class MultipleMatchingILCSError(Exception):
"""Exception raised when an ILRS statute matches multiple ILCS statutes"""
def __init__(self, raw_statute):
self.raw_statute = raw_statute

def __str__(self):
return ("More than one matching ILCS sections "
"for raw statute '{}'".format(self.raw_statute))

class IUCRLookupError(Exception):
"""Exception raised when a matching IUCR offense for an ILCS section cannot
be found"""
Expand Down Expand Up @@ -102,7 +111,7 @@ def parse_statute(s):
except KeyError:
# No match

# Try stripping trailing bits from paragraph
# Try stripping trailing bits from paragraph
m = ilrs_paragraph_re.match(paragraph)
if not m:
raise ILCSLookupError(chapter, paragraph, s)
Expand All @@ -113,8 +122,9 @@ def parse_statute(s):
except KeyError:
raise ILCSLookupError(chapter, paragraph, s)

assert len(ilcs_sections) == 1, ("More than one matching ILCS sections "
"for raw statute '{}'".format(s))
if len(ilcs_sections) != 1:
raise MultipleMatchingILCSError(s)

ilcs_section = ilcs_sections[0]
ilcs_parsed = [
(ilcs_section.chapter, 'chapter'),
Expand All @@ -128,9 +138,9 @@ def parse_statute(s):
def parse_subsection(s):
"""
Parse the subsection portion of a statute citation
Arguments:
s (str): String containing the subsection portion of statute citation
s (str): String containing the subsection portion of statute citation
Returns:
List of strings representing the subsection bits
Expand All @@ -139,7 +149,7 @@ def parse_subsection(s):
['c', '2']
"""
subsections = []
bits = re.split(r'[-(\s]', s)
bits = re.split(r'[-(\s]', s)
for bit in bits:
if bit:
subsections.append(re.sub(r'[)]$', '', bit))
Expand All @@ -149,7 +159,7 @@ def parse_ilcs_statute(s):
statute_parts = []
m = ilcs_statute_re.match(s)
if not m:
return statute_parts
return statute_parts

statute_parts.append((m.group('chapter'), 'chapter'))
statute_parts.append((m.group('act_prefix'), 'act_prefix'))
Expand Down Expand Up @@ -288,25 +298,42 @@ def fix_ambiguous_statute(s):
except KeyError:
return s

def get_iucr(s):
def format_statute(parsed_statute):
"""Nicely format a parsed statute"""
chapter = parsed_statute[0][0]
act_prefix = parsed_statute[1][0]
section = parsed_statute[2][0]
subsections = [ss[0] for ss in parsed_statute[3:]]
formatted = "{}-{}/{}".format(chapter, act_prefix, section)

for ss in subsections:
formatted += "({})".format(ss)

return formatted

def get_iucr(parsed_statute):
try:
parsed = parse_statute(s)
chapter = parsed[0][0]
act_prefix = parsed[1][0]
section = parsed[2][0]
subsections = [ss[0] for ss in parsed[3:]]
chapter = parsed_statute[0][0]
act_prefix = parsed_statute[1][0]
section = parsed_statute[2][0]
subsections = [ss[0] for ss in parsed_statute[3:]]
return iucr.lookup_by_ilcs(chapter, act_prefix, section, *subsections)
except KeyError:
raise IUCRLookupError(s)
raise IUCRLookupError(format_statute(parsed_statute))

def strip_surrounding_parens(s):
"""
Strip surrounding parenthesis and curly braces from a statute string.
"""
s = s.strip('{').strip('}')
if s[0] == "(" and s[2] != ")":

if len(s) == 0:
return s

if s[0] == "(" and (len(s) < 3 or s[2] != ")"):
s = s[1:]
if s[-1] == ")" and s[-3] != "(":

if s[-1] == ")" and (len(s) < 3 or s[-3] != "("):
s = s[:-1]

return s
Expand All @@ -319,7 +346,7 @@ def strip_attempted_statute(s):
For attempted offenses, this is some version of "720-5/8-4" (ILCS) or
"38-8-4" (ILRS). For conspiracy offenses it's "720-5/8-2" and for
solicitation it's "720-5/8-1".
The exact representation can vary widely.
This function is needed because attempted crimes are represented by
Expand All @@ -329,7 +356,7 @@ def strip_attempted_statute(s):
38-8-4(38-18-2)
This breaks parsing the statutes for tasks like determining IUCR codes.
Returns:
A tuple where the first item is the statute indicating the crime and
the second item is the statute indicating an attempted offense. For
Expand Down
Loading

0 comments on commit fb89e3c

Please sign in to comment.