Skip to content

Commit

Permalink
Rename nyc-parks geocoder (#158)
Browse files Browse the repository at this point in the history
* rename nyc_parks.py

* update snapshots

* more imports
  • Loading branch information
danvk authored Nov 3, 2024
1 parent 69a891d commit 2503de0
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 60 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/usr/bin/python
#
# Look for well-known NYC parks.
"""Geocode using the "subjects" field and places of interest in the title.
The idea is that if an item has a subjects/geographic field like this:
[
"Prospect Park",
"Grand Army Plaza",
]
then we can look these two up in subjects.geojson, see that "Grand Army Plaza"
is more specific, and use that for geocoding.
"""

import fileinput
import re
import sys
from collections import Counter, defaultdict
Expand All @@ -12,9 +19,8 @@

from oldnyc.geocode.geocode_types import Coder, Locatable
from oldnyc.geojson_utils import assert_point
from oldnyc.item import blank_item

# TODO: move these into a data file, maybe GeoJSON
# TODO: use subjects.geojson instead of these lists
parks = {
"Bronx Park": (40.856389, -73.876667),
"Claremont Park": (40.840546, -73.907469),
Expand Down Expand Up @@ -271,7 +277,7 @@ def is_address_close(a: str, b: str) -> bool:
return abs(a_lat - b_lat) < 0.0001 and abs(a_lon - b_lon) < 0.0001 # ~11m


class NycParkCoder(Coder):
class SubjectsCoder(Coder):
geo_to_location: dict[str, tuple[int, pygeojson.Point]]
counters: Counter[str]

Expand All @@ -283,11 +289,6 @@ def __init__(self):
for f in features
if f.geometry
}
# for f in features:
# if f.properties.get("result") == "pier":
# self.geo_to_location[f.properties["geo"]] = pygeojson.Point(
# (-73.9737, 40.74421)
# )

self.counters = Counter()

Expand Down Expand Up @@ -463,30 +464,4 @@ def finalize(self):
# sys.stderr.write("%4d\t%s\n" % (v, k))

def name(self):
return "nyc-parks"


# For fast iteration
if __name__ == "__main__":
coder = NycParkCoder()
r = blank_item()
num_ok, num_bad = 0, 0
for line in fileinput.input():
addr = line.strip()
if not addr:
continue
r.address = addr
result = coder.codeRecord(r)

if result:
num_ok += 1
print('"%s" -> %s' % (addr, result))
else:
num_bad += 1

coder.finalize()

sys.stderr.write(
"Parsed %d / %d = %.4f records\n"
% (num_ok, num_ok + num_bad, 1.0 * num_ok / (num_ok + num_bad))
)
return "subjects"
6 changes: 3 additions & 3 deletions oldnyc/geocode/collect_subjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
from collections import Counter

from oldnyc.geocode.coders import nyc_parks
from oldnyc.geocode.coders import subjects
from oldnyc.item import Item, load_items


Expand All @@ -26,8 +26,8 @@ def print_geographics(items: list[Item]):
counts[g] += 1
if g.endswith(" (New York, N.Y.)"):
g = g.replace(" (New York, N.Y.)", "")
if g in nyc_parks.parks:
geo_to_loc[raw_g] = nyc_parks.parks[g]
if g in subjects.parks:
geo_to_loc[raw_g] = subjects.parks[g]

for name, count in counts.most_common():
loc = geo_to_loc.get(name) or ""
Expand Down
6 changes: 3 additions & 3 deletions oldnyc/geocode/geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
from dotenv import load_dotenv

from oldnyc.geocode import generate_js, geocoder
from oldnyc.geocode.coders import extended_grid, gpt, milstein, nyc_parks
from oldnyc.geocode.coders import extended_grid, gpt, milstein, subjects
from oldnyc.geocode.geocode_types import Coder, Locatable, Location
from oldnyc.item import Item, load_items

CODERS: dict[str, Callable[[], Coder]] = {
"extended-grid": extended_grid.ExtendedGridCoder,
"milstein": milstein.MilsteinCoder,
"nyc-parks": nyc_parks.NycParkCoder,
"subjects": subjects.SubjectsCoder,
"gpt": gpt.GptCoder,
}

Expand All @@ -42,7 +42,7 @@
parser.add_argument(
"-c",
"--coders",
default="extended-grid,milstein,nyc-parks",
default="extended-grid,milstein,subjects",
help="Set to a comma-separated list of coders. Coders run in the specified order.",
)

Expand Down
2 changes: 1 addition & 1 deletion oldnyc/geocode/subjects/csv_to_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pygeojson

from oldnyc.geocode.coders.nyc_parks import IGNORE_SUBJECTS
from oldnyc.geocode.coders.subjects import IGNORE_SUBJECTS


def main():
Expand Down
4 changes: 2 additions & 2 deletions oldnyc/geocode/subjects/make_localturk_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pygeojson
from haversine import haversine

from oldnyc.geocode.coders.nyc_parks import IGNORE_SUBJECTS, NycParkCoder
from oldnyc.geocode.coders.subjects import IGNORE_SUBJECTS, SubjectsCoder
from oldnyc.geojson_utils import assert_point
from oldnyc.item import Item, load_items
from oldnyc.util import encode_json_base64, pick
Expand All @@ -35,7 +35,7 @@ def maybe_coords(p: pygeojson.Point | None):

def main():
items = load_items("data/images.ndjson")
coder = NycParkCoder()
coder = SubjectsCoder()

other_geocodes = pygeojson.load_feature_collection(open("/tmp/images.geojson")).features
id_to_location = {str(f.id): assert_point(f.geometry) for f in other_geocodes if f.geometry}
Expand Down
22 changes: 11 additions & 11 deletions test/random200-geocoded.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
703041f milstein (40.6823228, -73.9706163) Atlantic Avenue and Carlton Avenue, Brooklyn, NY
707148f milstein (40.6205804, -73.9019884) Ave. V and Bergen Ave., Brooklyn, NY
716421f extended-grid (40.725081, -73.981232) @40.725081,-73.981232
731946f nyc-parks (40.574926, -73.985941) @40.574926,-73.985941
731946f subjects (40.574926, -73.985941) @40.574926,-73.985941
716604f failed n/a n/a
726128f milstein (40.7704092, -73.8246714) Bayside Avenue and Parsons Boulevard, Queens, NY
728569f milstein (40.6289529, -74.0797226) Beach Street and Van Duzer Street, Staten Island, NY
Expand All @@ -96,9 +96,9 @@
703479f failed n/a n/a
718754f milstein (40.7121586, -73.9806996) Cherry Street and Jackson Street, Manhattan, NY
703755f milstein (40.6739616, -74.0080168) Columbia Street and Creamer Street, Brooklyn, NY
719118f nyc-parks (40.76808, -73.981896) @40.768080,-73.981896
719118f subjects (40.76808, -73.981896) @40.768080,-73.981896
733706f failed n/a n/a
732005f nyc-parks (40.574926, -73.985941) @40.574926,-73.985941
732005f subjects (40.574926, -73.985941) @40.574926,-73.985941
1509541 failed n/a n/a
726456f milstein (40.6967542, -73.9005186) Cypress Avenue and Summerfield Avenue, Queens, NY
719259f milstein (40.7200098, -73.992885) Chrystie Street and Delancey Street, Manhattan, NY
Expand All @@ -118,8 +118,8 @@
726840f failed n/a n/a
720408f milstein (40.7155152, -73.9897764) Essex Street and Hester Street, Manhattan, NY
704920f milstein (40.697888, -73.99460499999999) Clark Street and Hicks Street, Brooklyn, NY
720438f nyc-parks (40.842551, -73.932621) @40.842551,-73.932621
730836f nyc-parks (40.842308, -73.930277) @40.842308,-73.930277
720438f subjects (40.842551, -73.932621) @40.842551,-73.932621
730836f subjects (40.842308, -73.930277) @40.842308,-73.930277
704967f milstein (40.705439, -73.95633699999999) Hooper Street and Marcy Avenue, Brooklyn, NY
701306f failed n/a n/a
726942f milstein (40.7416676, -73.9542183) Jackson Avenue and Vernon Boulevard, Queens, NY
Expand All @@ -142,19 +142,19 @@
722402f milstein (40.8058042, -73.9386896) 126th Street and Park Avenue, Manhattan, NY
722101f extended-grid (40.749644, -73.979609) @40.749644,-73.979609
722430f milstein (40.71200320000001, -74.0081046) Broadway and Park Row, Manhattan, NY
734005f nyc-parks (40.705573, -74.001457) @40.705573,-74.001457
734005f subjects (40.705573, -74.001457) @40.705573,-74.001457
701674f milstein (40.8376796, -73.85350729999999) Purdy Street and St. Raymond Avenue, Bronx, NY
104780 failed n/a n/a
729420f milstein (40.5732734, -74.1469118) Mill Road and Richmond Hill Road, Staten Island, NY
722881f milstein (40.7849851, -73.9826672) 79th Street and Riverside Drive, Manhattan, NY
722717f milstein (40.78691060000001, -73.9812484) 82nd Street and Riverside Drive, Manhattan, NY
719805f nyc-parks (40.861619, -73.933622) @40.861619,-73.933622
719805f subjects (40.861619, -73.933622) @40.861619,-73.933622
723027f milstein (40.7584384, -73.9789121) 49th Street (West) and Rockefeller Plaza, Manhattan, NY
723036f failed n/a n/a
727922f failed n/a n/a
104536 failed n/a n/a
723323f milstein (40.7094957, -73.994007) Market Slip (West). and South Street, Manhattan, NY
734332f nyc-parks (40.873694, -73.911064) @40.873694,-73.911064
734332f subjects (40.873694, -73.911064) @40.873694,-73.911064
706565f failed n/a n/a
723111f milstein (40.7275748, -73.9853065) 1st Avenue and St. Marks Place, Manhattan, NY
723134f milstein (40.8253111, -73.9437817) 147th Street and St. Nicholas Avenue, Manhattan, NY
Expand All @@ -164,7 +164,7 @@
702016f milstein (40.8608772, -73.8418068) Waring Avenue and Woodhull Avenue, Bronx, NY
707267f milstein (40.686453, -73.99392999999999) Court St. and Warren St., Brooklyn, NY
707268f milstein (40.686453, -73.99392999999999) Court St. and Warren St., Brooklyn, NY
731060f nyc-parks (40.846944, -73.928056) @40.846944,-73.928056
731060f subjects (40.846944, -73.928056) @40.846944,-73.928056
724252f milstein (40.7087712, -74.00091499999999) Dover Street and Water Street, Manhattan, NY
702047f milstein (40.8659309, -73.8858516) 198th Street (East) and Webster Avenue, Bronx, NY
1558186 extended-grid (40.752191, -73.993472) @40.752191,-73.993472
Expand All @@ -182,9 +182,9 @@
1113271 failed n/a n/a
730594f failed n/a n/a
731805f failed n/a n/a
734193f nyc-parks (40.540383, -74.135698) @40.540383,-74.135698
734193f subjects (40.540383, -74.135698) @40.540383,-74.135698
1635861 failed n/a n/a
1635949 nyc-parks (40.790882, -73.775732) @40.790882,-73.775732
1635949 subjects (40.790882, -73.775732) @40.790882,-73.775732
1635983 failed n/a n/a
1636238 failed n/a n/a
1663931 failed n/a n/a
Expand Down
2 changes: 1 addition & 1 deletion test/random200.logs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ POI/subject geocoding:
1 n_title_park
53 extended-grid
81 milstein
11 nyc-parks
11 subjects
145 (total)

0 comments on commit 2503de0

Please sign in to comment.