Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ghing committed Jun 24, 2014
0 parents commit 2af92d7
Show file tree
Hide file tree
Showing 27 changed files with 1,079 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.pyc
*.swp
__pycache__
data
convictions/settings/dev.py
convictions.sqlite3
56 changes: 56 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
=====================
cook-convictions-data
=====================

A simple Django project for loading, cleaning and querying Cook County Illinois convictions data.

This is the preprocessing backend that drives the presentation of https://github.com/sc3/cook-convictions/

Quickstart
==========

Installation
------------

::

git clone https://github.com/sc3/cook-convictions-data.git
mkvirtualenv convictions
cd django-convictions
pip install -r requirements.txt
cp convictions/setttings/dev.example.py convictions/settings/dev.py
# Edit convictions/settings/dev.py to fill in the needed variables
spatialite convictions.sqlite3 "SELECT InitSpatialMetaData();"
./manage.py syncdb
./manage.py migrate

Load spatial data
-----------------

First, download and unpack the Shapefile version of the Cook County Municipalities data from https://datacatalog.cookcountyil.gov/GIS-Maps/ccgisdata-Municipality/ta8t-zebk

Then run::

./manage.py load_spatial_data Municipality data/Municipality/Municipality.shp

Load raw convictions data
-------------------------

::

./manage.py load_convictions_csv data/Criminal_Convictions_ALLCOOK_05-09.csv


Populate clean conviction records
---------------------------------

::

./manage.py create_convictions

Geocode conviction records
--------------------------

::

./manage.py geocode_convictions
Empty file added convictions/__init__.py
Empty file.
5 changes: 5 additions & 0 deletions convictions/settings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
try:
# Try to import the dev settings by default for convenience
from .dev import *
except ImportError:
pass
92 changes: 92 additions & 0 deletions convictions/settings/defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
Django settings for convictions project.
For more information on this file, see
https://docs.djangoproject.com/en/1.6/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.6/ref/settings/
"""

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/

ALLOWED_HOSTS = []

# Application definition

INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.gis',
'convictions_data',
'south',
)

MIDDLEWARE_CLASSES = (
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
)

ROOT_URLCONF = 'convictions.urls'

WSGI_APPLICATION = 'convictions.wsgi.application'


# Database
# https://docs.djangoproject.com/en/1.6/ref/settings/#databases

DATABASES = {
'default': {
'ENGINE': 'django.contrib.gis.db.backends.spatialite',
'NAME': os.path.join(BASE_DIR, 'convictions.sqlite3'),
}
}

# Internationalization
# https://docs.djangoproject.com/en/1.6/topics/i18n/

LANGUAGE_CODE = 'en-us'

TIME_ZONE = 'UTC'

USE_I18N = True

USE_L10N = True

USE_TZ = True


# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.6/howto/static-files/

STATIC_URL = '/static/'

LOGGING = {
'version': 1,
'handlers': {
'console':{
'level': 'DEBUG',
'class': 'logging.StreamHandler',
},
},
'loggers': {
'conviction_data': {
'handlers': ['console'],
'level': 'WARN',
}
}
}

22 changes: 22 additions & 0 deletions convictions/settings/dev.example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .defaults import *

# SECURITY WARNING: keep the secret key used in production secret!
# You need to generate a secret key. You can do it using this
# python snippet:
#
# from django.utils.crypto import get_random_string
# chars = 'abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*(-_=+)'
# get_random_string(50, chars)
SECRET_KEY = ""

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

TEMPLATE_DEBUG = True

# Settings specific to the convictions project

# For now, we're using the MapQuest Geocoder. You'll need to sign up
# and get an API KEY at http://developer.mapquest.com. Copy and paste
# that value into this variable.
CONVICTIONS_GEOCODER_API_KEY = ""
12 changes: 12 additions & 0 deletions convictions/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from django.conf.urls import patterns, include, url

from django.contrib import admin
admin.autodiscover()

urlpatterns = patterns('',
# Examples:
# url(r'^$', 'convictions.views.home', name='home'),
# url(r'^blog/', include('blog.urls')),

url(r'^admin/', include(admin.site.urls)),
)
14 changes: 14 additions & 0 deletions convictions/wsgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
WSGI config for convictions project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.6/howto/deployment/wsgi/
"""

import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "convictions.settings")

from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
Empty file added convictions_data/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions convictions_data/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
55 changes: 55 additions & 0 deletions convictions_data/geocoders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from geopy.geocoders import OpenMapQuest
from geopy.compat import urlencode
from geopy.location import Location

class BatchOpenMapQuest(OpenMapQuest):
def batch_geocode(self, queries, exactly_one=True, timeout=None):
params = []

if exactly_one:
params.append(('maxResults', 1))

for q in queries:
params.append(('location', q))

# Don't include URL to a thumbnail map to make the payloads smaller
params.append(('thumbMaps', 'false'))

url = "{}://open.mapquestapi.com/geocoding/v1/batch?outFormat=json".format(
self.scheme)
# The key is already urlencoded, so just append it at the end
url = "&".join((url, urlencode(params), "key={}".format(self.api_key)))
data = self._call_geocoder(url, timeout=timeout)
return self._batch_parse_json(data['results'], exactly_one)

@classmethod
def _batch_parse_json(cls, resources, exactly_one=True):
return [cls._batch_parse_json_single(r) for r in resources]

@classmethod
def _batch_parse_json_single(cls, resource, exactly_one=True):
"""
Parse a single location record from the raw data into a Location
object
"""
# TODO: Handle exactly_one parameter
loc = resource['locations'][0]
lat = loc['latLng']['lat']
lng = loc['latLng']['lng']
address = cls._build_canonical_address(loc)
return Location(address, (lat, lng), loc)

@classmethod
def _build_canonical_address(cls, location):
"""
Create a single address string from the address bits in the geocoder
response.
"""
street = location['street']
city = location['adminArea5']
state = location['adminArea3']
country = location['adminArea1']
postal_code = location['postalCode']

return "{} {}, {} {} {}".format(street, city, state, country,
postal_code)
Empty file.
Empty file.
27 changes: 27 additions & 0 deletions convictions_data/management/commands/create_convictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from optparse import make_option

from django.core.management.base import BaseCommand

from convictions_data.models import Conviction, RawConviction

class Command(BaseCommand):
help = "Create clean conviction records from raw data"

option_list = BaseCommand.option_list + (
make_option('--delete',
action='store_true',
dest='delete',
default=False,
help="Delete previously created models",
),
)

def handle(self, *args, **options):
if options['delete']:
Conviction.objects.all().delete()

models = []
for rc in RawConviction.objects.all():
models.append(Conviction(raw_conviction=rc))

Conviction.objects.bulk_create(models)
26 changes: 26 additions & 0 deletions convictions_data/management/commands/export_bad_addresses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from csv import DictWriter

from django.core.management.base import BaseCommand

from convictions_data.models import Conviction

class Command(BaseCommand):
help = "Export a list of convictions with ungeocodeable addresses to CSV"

def handle(self, *args, **options):
models = Conviction.objects.has_bad_address()

fields = ['id', 'address', 'city', 'state', 'raw_citystate', 'zipcode']
writer = DictWriter(self.stdout, fields)
writer.writeheader()

for conviction in models:
writer.writerow({
'id': conviction.id,
'address': conviction.address,
'city': conviction.city,
'state': conviction.state,
'zipcode': conviction.zipcode,
'raw_citystate': conviction.raw_conviction.city_state,
})

37 changes: 37 additions & 0 deletions convictions_data/management/commands/geocode_convictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import logging
import sys

from django.core.management.base import BaseCommand

from convictions_data.models import Conviction

logger = logging.getLogger(__name__)

class Command(BaseCommand):
help = "Geocode conviction records"

def handle(self, *args, **options):
# By default, only try to geocode records that don't
# have lat/lon values
models = Conviction.objects.ungeocoded()

if not self._check_addresses(models):
sys.exit(1)

models.geocode()

def _check_addresses(self, models):
addresses_ok = True

for conviction in models:
try:
address = conviction.geocoder_address
except ValueError:
msg = "Bad city ({}), state ({}) or zip ({}) for conviction with id {}".format(
conviction.city, conviction.state, conviction.zipcode,
conviction.id)
logger.error(msg)
addresses_ok = False

return addresses_ok

Loading

0 comments on commit 2af92d7

Please sign in to comment.