forked from osmlab/dcbuildings
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmerge.py
81 lines (71 loc) · 2.83 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Merge addresses into buildings they intersect with
# Write them to merged/
# TODO: extend this to use the parcels as an intermediate join
from fiona import collection
from rtree import index
from shapely.geometry import asShape, Point, LineString
import re
from sys import argv
from glob import glob
from multiprocessing import Pool
import json
def merge(buildingIn, addressIn, mergedOut, extraOut):
addresses = []
extraAddrs = [] # the addresses that fail to join
with collection(addressIn, "r") as input:
for address in input:
shape = asShape(address['geometry'])
shape.original = address
addresses.append(shape)
# Load and index all buildings.
buildings = []
buildingShapes = []
buildingIdx = index.Index()
with collection(buildingIn, "r") as input:
for building in input:
shape = asShape(building['geometry'])
building['properties']['addresses'] = []
buildings.append(building)
buildingShapes.append(shape)
buildingIdx.add(len(buildings) - 1, shape.bounds)
# Map addresses to buildings.
for address in addresses:
didMatch = False
for i in buildingIdx.intersection(address.bounds):
if buildingShapes[i].contains(address):
buildings[i]['properties']['addresses'].append(address.original)
didMatch = True
break
if not didMatch:
extraAddrs.append(address.original)
# These are the buildings w/ any intersected addresses
if len(buildings) > 0:
with open(mergedOut, 'w') as outFile:
outFile.writelines(json.dumps(buildings, indent=4))
print 'Exported ' + mergedOut
# These are the remaining addresses that failed to intersect
if len(extraAddrs) > 0:
with open(extraOut, 'w') as outFile2:
outFile2.writelines(json.dumps(extraAddrs, indent=4))
print 'Exported ' + extraOut
def prep(fil3):
matches = re.match('^.*-(\d+)\.shp$', fil3).groups(0)
merge(fil3,
'chunks/addresses-%s.shp' % matches[0],
'merged/buildings-addresses-%s.geojson' % matches[0],
'merged/extra-addresses-%s.geojson' % matches[0])
if __name__ == '__main__':
# Run merges. Expects an chunks/addresses-[block group geoid].shp for each
# chunks/buildings-[block group geoid].shp.
# Optionally convert only one block group (passing the id as the argument).
if (len(argv) == 2):
merge('chunks/buildings-%s.shp' % argv[1],
'chunks/addresses-%s.shp' % argv[1],
'merged/buildings-addresses-%s.geojson' % argv[1],
'merged/extra-addresses-%s.geojson' % argv[1])
else:
buildingFiles = glob("chunks/buildings-*.shp")
pool = Pool()
pool.map(prep, buildingFiles)
pool.close()
pool.join()