-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexpand-data.py
61 lines (53 loc) · 1.6 KB
/
expand-data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
""" Create a row for each attribute in the hashtag report.
"""
import csv, hxl, sys
output = csv.writer(sys.stdout)
output.writerow([
"Attribute",
"Hashtag",
"Hashtag with Attributes",
"Text header",
"Locations",
"Data provider",
"HDX dataset id",
"HDX resource id",
"Date created",
"Hash",
])
output.writerow([
"#meta+attribute",
"#meta+tag",
"#meta+tagspec",
"#meta+header",
"#country+code",
"#org+provider",
"#meta+dataset",
"#meta+resource",
"#date+created",
"#meta+hash",
])
with hxl.data(sys.stdin.buffer) as input:
for row in input:
tagspec = row.get("meta+tagspec")
column = hxl.model.Column.parse(tagspec)
countries = sorted(row.get("country+code+list").split(","))
if not countries:
countries = [""] # make sure we get each attribute at least once
countries = [country.upper() for country in countries]
attributes = sorted(column.attributes)
if not attributes:
attributes = [""] # make sure we get at least one row for each
for attribute in attributes:
for country in countries:
output.writerow([
"+" + attribute,
column.tag,
tagspec,
row.get("#meta+header"),
country,
row.get("#org+provider"),
row.get("#meta+dataset"),
row.get("#meta+resource"),
row.get("#date+created"),
row.get("#meta+hash"),
])