-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_utils.py
86 lines (71 loc) · 2.89 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from __future__ import print_function
import numpy as np
from numpy.testing import assert_array_almost_equal
def multiclass_noisify(y, P, random_state=0):
""" Flip classes according to transition probability matrix T.
It expects a number between 0 and the number of classes - 1.
"""
print(np.max(y), P.shape[0])
assert P.shape[0] == P.shape[1]
assert np.max(y) < P.shape[0]
# row stochastic matrix
assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1]))
assert (P >= 0.0).all()
m = y.shape[0]
print(m)
new_y = y.copy()
flipper = np.random.RandomState(random_state)
for idx in np.arange(m):
i = y[idx] #label
# draw a vector with only an 1
flipped = flipper.multinomial(1, P[i, :][0], 1)[0]
new_y[idx] = np.where(flipped == 1)[0]
return new_y
# noisify_pairflip call the function "multiclass_noisify"
def noisify_pairflip(y_train, noise, random_state=None, nb_classes=10):
"""mistakes:
flip in the pair
"""
P = np.eye(nb_classes)
n = noise
if n > 0.0:
# 0 -> 1
P[0, 0], P[0, 1] = 1. - n, n
for i in range(1, nb_classes-1):
P[i, i], P[i, i + 1] = 1. - n, n
P[nb_classes-1, nb_classes-1], P[nb_classes-1, 0] = 1. - n, n
y_train_noisy = multiclass_noisify(y_train, P=P,
random_state=random_state)
actual_noise = (y_train_noisy != y_train).mean()
assert actual_noise > 0.0
print('Actual noise %.2f' % actual_noise)
y_train = y_train_noisy
print(P)
return y_train, actual_noise
def noisify_multiclass_symmetric(y_train, noise, random_state=None, nb_classes=10):
"""mistakes:
flip in the symmetric way
"""
P = np.ones((nb_classes, nb_classes))#5x5
n = noise
P = (n / (nb_classes - 1)) * P
if n > 0.0:
# 0 -> 1
P[0, 0] = 1. - n
for i in range(1, nb_classes-1):
P[i, i] = 1. - n
P[nb_classes-1, nb_classes-1] = 1. - n
y_train_noisy = multiclass_noisify(y_train, P=P,
random_state=random_state)
actual_noise = (y_train_noisy != y_train).mean()
assert actual_noise > 0.0
print('Actual noise %.2f' % actual_noise)
y_train = y_train_noisy
print(P)
return y_train, actual_noise
def noisify(nb_classes=5, train_labels=None, noise_type=None, noise_rate=0):
if noise_type == 'pairflip':
train_noisy_labels, actual_noise_rate = noisify_pairflip(train_labels, noise_rate, random_state=0, nb_classes=nb_classes)
if noise_type == 'symmetric':
train_noisy_labels, actual_noise_rate = noisify_multiclass_symmetric(train_labels, noise_rate, random_state=0, nb_classes=nb_classes)
return train_noisy_labels, actual_noise_rate