-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathutils.py
71 lines (56 loc) · 2.02 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import sys
import csv
import json
import collections
def relative_path(fn):
return os.path.abspath(os.path.join(os.path.dirname(__file__), fn))
def csv_reader_converter(utf8_data, dialect=csv.excel, **kwargs):
csv_reader = csv.reader(utf8_data, dialect=dialect, **kwargs)
for row in csv_reader:
yield [unicode(cell, 'latin-1') for cell in row]
def load_csv_columns(filename, column_names=None, delimiter=',', quoting=csv.QUOTE_MINIMAL):
r = []
with open(filename, 'r') as f:
data_file = csv_reader_converter(f, delimiter=delimiter, quoting=quoting)
headers = next(data_file, None) # parse the headers
columns = {}
for (i, h) in enumerate(headers):
h = h.strip()
if h in column_names or not column_names:
columns[i] = h
print "headers", headers
print "columns", column_names
for line in data_file:
d = {}
for (column, index) in columns.items():
rename = column_names[index]
value = line[column].strip()
d[rename] = value
r.append(d)
return r
def build_dict(seq, key):
return dict((d[key], dict(d, index=i)) for (i, d) in enumerate(seq))
def split_dict_by(data, key, subkey=None):
split_dict = collections.defaultdict(list)
for item in data:
if subkey:
try:
key_val = item[key][subkey]
except KeyError:
print 'unable to find [%s][%s] in item %s' % (key, subkey, item['id'])
continue
else:
try:
key_val = item[key]
except KeyError:
print "unable to find [%s] in item %s" % (key, item['id'])
continue
split_dict[key_val].append(item)
return split_dict
def read_json(filename):
with open(filename, 'r') as f:
return json.loads(f.read())
def write_json(filename, data):
with open(filename, 'w') as f:
f.write(json.dumps(data))