diff --git a/lat-lon-map.txt b/data/lat-lon-map.txt similarity index 100% rename from lat-lon-map.txt rename to data/lat-lon-map.txt diff --git a/locations.txt b/data/locations.txt similarity index 100% rename from locations.txt rename to data/locations.txt diff --git a/cluster-locations.py b/oldnyc/geocode/cluster.py similarity index 93% rename from cluster-locations.py rename to oldnyc/geocode/cluster.py index 36b4670a..6a542026 100755 --- a/cluster-locations.py +++ b/oldnyc/geocode/cluster.py @@ -1,8 +1,8 @@ #!/usr/bin/env python """ -Reads in locations.txt (produced by generate-geocodes.py with ---output_mode=locations.txt) and clusters very close points. This reduces the -number of unique map markers and makes it easier to find things. +Reads in locations.txt (produced by geocode.py with --output_mode=locations.txt) +and clusters very close points. This reduces the number of unique map markers and +makes it easier to find things. Output is an exhaustive mapping of "old_lat,old_lon\tnew_lat,new_lon" pairs. @@ -16,8 +16,10 @@ import fileinput +# TODO: move to argparse DISTANCE_THRESHOLD = 20 +# TODO: move to argparse output_mode = "map" # 'urls' counts = []