-
Notifications
You must be signed in to change notification settings - Fork 0
/
distance.py
executable file
·50 lines (33 loc) · 1.21 KB
/
distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/local/bin/pypy
import sys
import math
from array import array
import operator
VERBOSE = False
def reader(filename, verbose=VERBOSE, delimeter="\t", nrows=1000):
with open(filename, 'rb') as csvfile:
for rownum, line in enumerate(csvfile, start=1):
yield line.strip().split(delimeter)
if rownum == nrows:
break
features = []
for record in reader("german.embeddings", verbose=True, delimeter="\t", nrows=80000):
word, featurelst = record[0], array('f', [float(f) for f in record[1:]])
if VERBOSE:
print >> sys.stderr, word
features.append(featurelst)
#wordVector.append(word)
#featureMatrix = np.array(features)
n = len(features)
#print >> sys.stderr, n
#print >> sys.stderr, features[0]
for i in range(n):
buffer = [
"%05d %05d %.6f" % (i, j, math.sqrt(sum((f1 - f2) * (f1 - f2) for f1, f2 in zip(features[i], features[j])))) for
j in range(n) if i != j]
#buffer.append((i,j,-euc))
#print >> sys.stdout, "%05d %05d %.6f" % (i + 1, j + 1, -euc)
#print >> sys.stderr, "Row %d is done." % i
#print >> sys.stdout, "\n".join(buffer), "\n"
sys.stdout.write("\n".join(buffer))
sys.stdout.write("\n")