Skip to content

Commit

Permalink
Finishing up distances implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Jun 23, 2020
1 parent a28fdbd commit 95ba131
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 3 deletions.
78 changes: 76 additions & 2 deletions opfython/math/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ def chord_distance(x, y):
"""

# Calculates the Chord distance
dist = 2 - 2 * (np.einsum('i->', x * y) /
(np.einsum('i->', x ** 2) * np.einsum('i->', y ** 2)))
dist = 2 - 2 * (np.einsum('i->', x * y) / (np.einsum('i->', x ** 2) * np.einsum('i->', y ** 2)))

return dist ** 0.5

Expand Down Expand Up @@ -276,6 +275,57 @@ def gower_distance(x, y):
return np.einsum('i->', dist) / x.shape[0]


def hamming_distance(x, y):
"""Calculates the Hamming Distance.
Args:
x (np.array): N-dimensional array.
y (np.array): N-dimensional array.
Returns:
The Hamming Distance between x and y.
"""

# Calculates number of occurences `x != y`
dist = np.count_nonzero(x != y)

return dist


def hassanat_distance(x, y):
"""Calculates the Hassanat Distance.
Args:
x (np.array): N-dimensional array.
y (np.array): N-dimensional array.
Returns:
The Hassanat Distance between x and y.
"""

# Creates an empty variable to hold each dimension's
dist = np.zeros(x.shape[0])

# Creates a binary mask
mask = np.minimum(x, y) >= 0

# Gathers the true and false indexes
true_idx, false_idx = np.argwhere(mask == True), np.argwhere(mask == False)

# Calculates the Hassanat Distance for true indexes
dist[true_idx] = 1 - (1 + np.minimum(x[true_idx], y[true_idx])) / \
(1 + np.maximum(x[true_idx], y[true_idx]))

# Calculates the Hassanat Distance for false indexes
dist[false_idx] = 1 - (1 + np.minimum(x[false_idx], y[false_idx]) + np.fabs(np.minimum(x[false_idx], y[false_idx]))) / \
(1 + np.maximum(x[false_idx], y[false_idx]) +
np.fabs(np.minimum(x[false_idx], y[false_idx])))

return np.einsum('i->', dist)


def hellinger_distance(x, y):
"""Calculates the Hellinger Distance (Jeffries-Matusita Distance).
Expand Down Expand Up @@ -721,6 +771,27 @@ def squared_euclidean_distance(x, y):
return np.einsum('i->', dist)


def statistic_distance(x, y):
"""Calculates the Statistic Distance.
Args:
x (np.array): N-dimensional array.
y (np.array): N-dimensional array.
Returns:
The Statistic Distance between x and y.
"""

# Calculates the `m` coefficient
m = (x + y) / 2

# Calculates the Statistic distance for each dimension
dist = (x - m) / m

return np.einsum('i->', dist)


def topsoe_distance(x, y):
"""Calculates the Topsoe Distance (Information Statistics).
Expand Down Expand Up @@ -832,6 +903,8 @@ def vicis_wave_hedges_distance(x, y):
'euclidean': euclidean_distance,
'gaussian': gaussian_distance,
'gower': gower_distance,
'hamming': hamming_distance,
'hassanat': hassanat_distance,
'hellinger': hellinger_distance,
'jaccard': jaccard_distance,
'jeffreys': jeffreys_distance,
Expand All @@ -856,6 +929,7 @@ def vicis_wave_hedges_distance(x, y):
'squared': squared_distance,
'squared_chord': squared_chord_distance,
'squared_euclidean': squared_euclidean_distance,
'statistic': statistic_distance,
'topsoe': topsoe_distance,
'vicis_symmetric1': vicis_symmetric1_distance,
'vicis_symmetric2': vicis_symmetric2_distance,
Expand Down
29 changes: 28 additions & 1 deletion tests/opfython/math/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,24 @@ def test_gower_distance():
assert dist == 0.2000000000000002


def test_hamming_distance():
x = np.asarray([5.1, 3.5, 1.4, 0.3])
y = np.asarray([5.4, 3.4, 1.7, 0.2])

dist = distance.hamming_distance(x, y)

assert dist == 4


def test_hassanat_distance():
x = np.asarray([5.1, 3.5, 1.4, 0.3])
y = np.asarray([5.4, 3.4, 1.7, 0.2])

dist = distance.hassanat_distance(x, y)

assert dist == 0.2571314102564104


def test_hellinger_distance():
x = np.asarray([5.1, 3.5, 1.4, 0.3])
y = np.asarray([5.4, 3.4, 1.7, 0.2])
Expand Down Expand Up @@ -355,6 +373,15 @@ def test_squared_euclidean_distance():
assert dist == 0.20000000000000046


def test_statistic_distance():
x = np.asarray([5.1, 3.5, 1.4, 0.3])
y = np.asarray([5.4, 3.4, 1.7, 0.2])

dist = distance.statistic_distance(x, y)

assert dist == 0.08914713150337263


def test_topsoe_distance():
x = np.asarray([5.1, 3.5, 1.4, 0.3])
y = np.asarray([5.4, 3.4, 1.7, 0.2])
Expand Down Expand Up @@ -397,4 +424,4 @@ def test_vicis_wave_hedges_distance():

dist = distance.vicis_wave_hedges_distance(x, y)

assert dist == 0.8025210084033614
assert dist == 0.8025210084033614

0 comments on commit 95ba131

Please sign in to comment.