-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsbd_util.py
120 lines (100 loc) · 3 KB
/
sbd_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import re, cPickle, os, gzip, sys, math
def save_pickle(data, path):
o = gzip.open(path, 'wb')
cPickle.dump(data, o)
o.close()
ZCAT = 'gzcat' if 'Darwin' in os.popen('uname -a').read().split() else 'zcat'
def load_pickle(path):
#i = gzip.open(path, 'rb')
i = os.popen(ZCAT + ' ' + path)
data = cPickle.load(i)
i.close()
return data
def die(msg):
print '\nERROR: %s' %msg
sys.exit()
def logit(x, y=1):
return 1.0 / (1 + math.e ** (-1*y*x))
def get_files(path, pattern):
"""
Recursively find all files rooted in <path> that match the regexp <pattern>
"""
L = []
# base case: path is just a file
if (re.match(pattern, os.path.basename(path)) != None) and os.path.isfile(path):
L.append(path)
return L
# general case
if not os.path.isdir(path):
return L
contents = os.listdir(path)
for item in contents:
item = path + item
if (re.search(pattern, os.path.basename(item)) != None) and os.path.isfile(item):
L.append(item)
elif os.path.isdir(path):
L.extend(get_files(item + '/', pattern))
return L
class Counter(dict):
def __getitem__(self, entry):
try:
return dict.__getitem__(self, entry)
except KeyError:
return 0.0
def copy(self):
return Counter(dict.copy(self))
def __add__(self, counter):
"""
Add two counters together in obvious manner.
"""
newCounter = Counter()
for entry in set(self).union(counter):
newCounter[entry] = self[entry] + counter[entry]
return newCounter
def sortedKeys(self):
"""
returns a list of keys sorted by their values
keys with the highest values will appear first
"""
sortedItems = self.items()
compare = lambda x,y: sign(y[1] - x[1])
sortedItems.sort(cmp=compare)
return [x[0] for x in sortedItems]
def totalCount(self):
"""
returns the sum of counts for all keys
"""
return sum(self.values())
def incrementAll(self, value=1):
"""
increment all counts by value
helpful for removing 0 probs
"""
for key in self.keys():
self[key] += value
def display(self):
"""
a nicer display than the built-in dict.__repr__
"""
for key, value in self.items():
s = str(key) + ': ' + str(value)
print s
def displaySorted(self, N=10):
"""
display sorted by decreasing value
"""
sortedKeys = self.sortedKeys()
for key in sortedKeys[:N]:
s = str(key) + ': ' + str(self[key])
print s
def normalize(counter):
"""
normalize a counter by dividing each value by the sum of all values
"""
counter = Counter(counter)
normalizedCounter = Counter()
total = float(counter.totalCount())
for key in counter.keys():
value = counter[key]
normalizedCounter[key] = value / total
return normalizedCounter