-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcollect_villages.py
80 lines (70 loc) · 2.32 KB
/
collect_villages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json
import unidecode
from pathlib import Path
def fix_soi_string(v):
if v == None:
return v
v = v.replace(">", "Ā")
v = v.replace("<", "ā")
v = v.replace("|", "Ī")
v = v.replace("\\", "ī")
v = v.replace("@", "Ū")
v = v.replace("#", "ū")
return v
name_fields = [ 'VILLAGE', 'VillageNam', 'VillNam', 'VT_NAME', 'SOI_NAME' ]
paths = Path('data/raw/villages/').glob('*/*/data.geojson')
out = []
for path in paths:
data = json.loads(path.read_text())
feats = data['features']
for feat in feats:
props = feat['properties']
vill = None
not_found = True
for k in name_fields:
if k in props:
vill = props[k]
not_found = False
break
if not_found:
print(props)
if props['DISTRICT'] in [ 'ARWAL', 'AR>RIA']:
continue
if vill is not None:
props['VILLAGE'] = fix_soi_string(vill)
props['VILLAGE_C'] = unidecode.unidecode(props['VILLAGE'])
else:
props['VILLAGE'] = None
props['VILLAGE_C'] = None
if 'TEHSIL' not in props:
print(props)
dist = None
if 'DISTRICT' in props:
dist = props['DISTRICT']
elif 'District' in props:
dist = props['District']
else:
print(props)
#if 'STATE' not in props:
# print(props)
props['DISTRICT'] = fix_soi_string(dist)
if 'District' in props:
del props['District']
if props['DISTRICT'] is not None:
props['DISTRICT_C'] = unidecode.unidecode(props['DISTRICT'])
else:
props['DISTRICT_C'] = None
props['TEHSIL'] = fix_soi_string(props.get('TEHSIL', None))
if props['TEHSIL'] is not None:
props['TEHSIL_C'] = unidecode.unidecode(props['TEHSIL'])
else:
props['TEHSIL_C'] = None
props['STATE'] = fix_soi_string(props.get('STATE', None))
if props['STATE'] is not None:
props['STATE_C'] = unidecode.unidecode(props['STATE'])
else:
props['STATE_C'] = None
out.append(feat)
with open('SOI_villages.geojsonl', 'w') as f:
for feat in out:
f.write(json.dumps(feat, ensure_ascii=False) + '\n')