-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ee78093
commit 00c3009
Showing
17 changed files
with
2,010 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from algorithms.discrete_block_bayesian_opt import BlockBayesAttack | ||
from algorithms.discrete_bayesian_opt import BayesOpt |
12 changes: 12 additions & 0 deletions
12
textattack/algorithms/bayesopt/acquisition/acquisition_function/acquisition_functions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import torch | ||
from torch.distributions.normal import Normal | ||
|
||
def expected_improvement(mean, var, reference): | ||
predictive_normal = Normal(mean.new_zeros(mean.size()), mean.new_ones(mean.size())) | ||
std = torch.sqrt(var) | ||
standardized = (mean - reference) / std | ||
|
||
ucdf = predictive_normal.cdf(standardized) | ||
updf = torch.exp(predictive_normal.log_prob(standardized)) | ||
ei = std * (updf + standardized * ucdf) | ||
return ei |
2 changes: 2 additions & 0 deletions
2
textattack/algorithms/bayesopt/acquisition/algorithm/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from algorithms.bayesopt.acquisition.algorithm.kmeanspp import kmeans_pp | ||
from algorithms.bayesopt.acquisition.algorithm.greedy_ascent import acquisition_maximization_with_indices |
140 changes: 140 additions & 0 deletions
140
textattack/algorithms/bayesopt/acquisition/algorithm/greedy_ascent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import torch | ||
import numpy as np | ||
from algorithms.bayesopt.acquisition.acquisition_function.acquisition_functions import expected_improvement | ||
from algorithms.bayesopt.dpp.dpp import dpp_sample | ||
import copy | ||
def find_remained_indices(candidates, history_X, N): | ||
if len(candidates)==0: | ||
return [] | ||
values, indices = torch.topk(((candidates.long().t() == history_X.long().unsqueeze(-1)).all(dim=1)).int(),1,1 ) | ||
rm_ids = set([int(ind) for ind in indices[values!=0]]) | ||
remained_indices = list(set(list(range(N))) - rm_ids) | ||
return remained_indices | ||
|
||
def find_removed_indices(candidates, history_X): | ||
values, indices = torch.topk(((candidates.long().t() == history_X.long().unsqueeze(-1)).all(dim=1)).int(),1,1 ) | ||
rm_ids = set([int(ind) for ind in indices[values!=0]]) | ||
return list(rm_ids) | ||
|
||
def greedy_ascent_with_indices(center_indices, opt_indices, stage, hb, surrogate_model, batch_size, reference=None, filter=True, return_ei=False, acq_with_opt_indices=True): | ||
|
||
candidates = hb.neighbors(center_indices, stage+1, 0, indices=opt_indices) | ||
N, L = candidates.shape | ||
|
||
# filtering observed candidates | ||
if filter: | ||
remained_indices = find_remained_indices(candidates, hb.eval_X_reduced, N) | ||
else: | ||
remained_indices = list(set(list(range(N)))) | ||
|
||
testX_cate = candidates[remained_indices] | ||
|
||
# calculate acquisition | ||
if acq_with_opt_indices: | ||
testX = testX_cate[:,opt_indices] | ||
centerX = center_indices.view(1,-1)[:,opt_indices] | ||
else: | ||
testX = testX_cate | ||
centerX = center_indices.view(1,-1) | ||
|
||
if reference == None: | ||
_, reference, best_ind = hb.best_of_hamming(hb.orig_X, stage+1) | ||
|
||
if len(remained_indices)==0: | ||
"something wrong, do larger space in greedy ascent with indices" | ||
if return_ei: | ||
center_ei = surrogate_model.acquisition(centerX, bias=reference) | ||
return center_indices.view(1,-1), torch.Tensor([center_ei]) | ||
else: | ||
return center_indices.view(1,-1) | ||
|
||
ei = surrogate_model.acquisition(testX, bias=reference) | ||
#center_ei = surrogate_model.acquisition(centerX, bias=reference) | ||
|
||
topk_values, topk_indices = torch.topk(ei, min(len(ei),batch_size)) | ||
best_candidates_indices = torch.cat([testX_cate[idx].view(1,-1) for idx in topk_indices],dim=0) | ||
if return_ei: | ||
return best_candidates_indices, topk_values | ||
else: | ||
return best_candidates_indices | ||
import time | ||
def acquisition_maximization_with_indices(cur_seqs, opt_indices, batch_size, stage, hb, surrogate_model, reference=None, dpp_type='no', acq_with_opt_indices=True): | ||
global_candidates_, global_eis_ = [], [] | ||
|
||
t0 = time.time() | ||
for cur_seq in cur_seqs: | ||
cur_indices = hb.reduce_seq(cur_seq).view(1,-1) | ||
if acq_with_opt_indices: | ||
cur_ei = surrogate_model.acquisition(cur_indices[:,opt_indices], bias=reference) | ||
else: | ||
cur_ei = surrogate_model.acquisition(cur_indices, bias=reference) | ||
global_candidates_.append(cur_indices) | ||
global_eis_.append(cur_ei) | ||
|
||
num_next = int(np.ceil(100 / len(cur_seqs))) | ||
filtering = True | ||
new_candidates_ = [] | ||
new_eis_ = [] | ||
new_candidates, new_eis = greedy_ascent_with_indices(cur_indices, opt_indices, stage, hb, surrogate_model, batch_size=num_next, reference=reference, filter=filtering, return_ei=True, acq_with_opt_indices=acq_with_opt_indices) | ||
new_candidates_.append(new_candidates) | ||
new_eis_.extend(new_eis) | ||
N = len(new_candidates_) | ||
new_candidates_ = torch.cat(new_candidates_, dim=0) | ||
|
||
candidates, indices = unique(new_candidates_, dim=0) | ||
eis = [new_eis_[ind] for ind in indices] | ||
assert len(candidates) == len(eis), f'something wrong {len(candidates)}, {len(eis)}' | ||
|
||
global_candidates_.append(candidates) | ||
global_eis_.extend(eis) | ||
t1 = time.time() | ||
|
||
global_candidates, indices = unique(torch.cat(global_candidates_, dim=0), dim=0) | ||
global_eis = [global_eis_[ind] for ind in indices] | ||
N, L = global_candidates.shape | ||
remained_indices = find_remained_indices(global_candidates, hb.eval_X_reduced, N) | ||
t2 = time.time() | ||
|
||
global_candidates = global_candidates[remained_indices] | ||
global_eis = [global_eis[ind] for ind in remained_indices] | ||
assert len(global_candidates) == len(global_eis), f'something wrong {len(global_candidates)}, {len(global_eis)}' | ||
|
||
global_eis = torch.Tensor(global_eis) | ||
|
||
if len(global_candidates) == 0: | ||
return None | ||
|
||
t3 = time.time() | ||
if dpp_type == 'no' or dpp_type == 'no_one': | ||
topk_values, topk_indices = torch.topk(global_eis, min(len(global_eis),batch_size)) | ||
candidates = [hb.seq_by_indices(global_candidates[ind]) for ind in topk_indices] | ||
elif dpp_type == 'dpp_posterior': | ||
t4 = time.time() | ||
topk_values, topk_indices = torch.topk(global_eis, min(len(global_eis),100)) | ||
global_candidates = global_candidates[topk_indices] | ||
|
||
num = min(len(global_candidates), batch_size) | ||
if acq_with_opt_indices: | ||
Lmat = surrogate_model.get_covar(global_candidates[:,opt_indices].cuda()).cpu().detach().numpy() | ||
else: | ||
Lmat = surrogate_model.get_covar(global_candidates.cuda()).cpu().detach().numpy() | ||
Lmat = Lmat / np.mean(np.abs(Lmat)) | ||
if Lmat.shape[0] == num: | ||
best_indices = list(range(num)) | ||
else: | ||
best_indices = dpp_sample(Lmat, k=num, T=0) | ||
candidates = [hb.seq_by_indices(global_candidates[ind]) for ind in best_indices] | ||
t5 = time.time() | ||
if len(candidates): | ||
return candidates | ||
else: | ||
"something wrong, do larger space" | ||
return None | ||
|
||
def unique(x, dim=None): | ||
unique, inverse = torch.unique( | ||
x, sorted=True, return_inverse=True, dim=dim) | ||
perm = torch.arange(inverse.size(0), dtype=inverse.dtype, | ||
device=inverse.device) | ||
inverse, perm = inverse.flip([0]), perm.flip([0]) | ||
return unique, inverse.new_empty(unique.size(dim)).scatter_(0, inverse, perm) |
140 changes: 140 additions & 0 deletions
140
textattack/algorithms/bayesopt/acquisition/algorithm/kmeanspp.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import numpy as np | ||
def euc_dist_sq(data1, data2): | ||
''' | ||
inputs: | ||
data1 - numpy array of data points (n1, d) | ||
data2 - numpy array of data points (n2, d) | ||
''' | ||
n1, d1 = data1.shape | ||
n2, d2 = data2.shape | ||
assert d1 == d2, f"the embedding dimension of data1, data2 are different {d1} != {d2}." | ||
d = d1 | ||
c = np.reshape(data1,[n1,1,d]) - np.reshape(data2,[1,n2,d]) | ||
dist_sq = np.sum(np.square(c),axis=2) | ||
return dist_sq | ||
|
||
def euc_dist_sq_test(): | ||
data1 = np.random.randn(2,4) | ||
data2 = np.random.randn(3,4) | ||
print("data1 : ", data1) | ||
print("data2 : ", data2) | ||
print("euc_dist_sq : ", euc_dist_sq(data1,data2)) | ||
|
||
def hamming_dist_sq(data1, data2): | ||
''' | ||
inputs: | ||
data1 - numpy array of data points (n1, d) | ||
data2 - numpy array of data points (n2, d) | ||
''' | ||
n1, d1 = data1.shape | ||
n2, d2 = data2.shape | ||
assert d1 == d2, f"the embedding dimension of data1, data2 are different {d1} != {d2}." | ||
d = d1 | ||
c = (np.reshape(data1,[n1,1,d]) != np.reshape(data2,[1,n2,d])) * 1.0 | ||
dist_sq = np.square(np.sum(c,axis=2)) | ||
return dist_sq | ||
|
||
def hamming_dist_sq_test(): | ||
data1 = np.random.randint(5, size=[2,4]) | ||
data2 = np.random.randint(5, size=[3,4]) | ||
print("data1 : ", data1) | ||
print("data2 : ", data2) | ||
print("hamming_dist_sq : ", hamming_dist_sq(data1,data2)) | ||
|
||
def kmeans_pp(data, k, dist='euclidean', init_ind=None): | ||
''' | ||
initialized the centroids for K-means++ | ||
inputs: | ||
data - numpy array of data points having shape (n, d) | ||
k - number of clusters (k <= n) | ||
dist - the name of metric | ||
init_ind - int (if None, random init index) | ||
''' | ||
## initialize the centroids list and add | ||
## a randomly selected data point to the list | ||
centroids = [] | ||
selected_indices = [] | ||
|
||
if init_ind is None: | ||
init_ind = np.random.randint(data.shape[0]) | ||
centroids.append(data[init_ind, :]) | ||
selected_indices.append(init_ind) | ||
|
||
if dist == 'euclidean': | ||
d_sq_func = euc_dist_sq | ||
elif dist == 'hamming': | ||
d_sq_func = hamming_dist_sq | ||
|
||
## compute remaining centroids | ||
for _ in range(k - 1): | ||
all_indices = list(range(data.shape[0])) | ||
unselected_indices = list(set(all_indices) - set(selected_indices)) | ||
|
||
d_sq_to_centroid = d_sq_func(data[unselected_indices], data[selected_indices]) | ||
min_d_sq_to_centroid = np.min(d_sq_to_centroid, axis=1) | ||
if np.sum(min_d_sq_to_centroid)==0: | ||
break | ||
#prob = min_d_sq_to_centroid / np.sum(min_d_sq_to_centroid) | ||
#next_centroid_ind = all_indices[np.random.choice(unselected_indices, p=prob)] | ||
next_centroid_ind = unselected_indices[ | ||
np.argmax(min_d_sq_to_centroid) | ||
] | ||
|
||
selected_indices.append(next_centroid_ind) | ||
|
||
centroids.append(data[next_centroid_ind, :]) | ||
return np.array(centroids), selected_indices | ||
|
||
def kmeans_pp_test(): | ||
import matplotlib.pyplot as plt | ||
|
||
## 1. euc | ||
data = np.random.randn(100,2) | ||
centroids, selected_indices = kmeans_pp(data, 5, dist='euclidean') | ||
plt.scatter(data[:,0],data[:,1],label=0) | ||
plt.scatter(data[selected_indices,0],data[selected_indices,1],label=1) | ||
plt.savefig('kmeans_test1.png') | ||
plt.close() | ||
## 2. hamming | ||
data = np.random.randint(20,size=[100,2]) | ||
centroids, selected_indices = kmeans_pp(data, 5, dist='hamming') | ||
plt.scatter(data[:,0],data[:,1],label=0) | ||
plt.scatter(data[selected_indices,0],data[selected_indices,1],label=1) | ||
plt.savefig('kmeans_test2.png') | ||
|
||
def kmeans_pp_test2(): | ||
dl = [] | ||
for i in range(10): | ||
dl.append(np.random.randn(100,512) + i*10) | ||
data = np.reshape(np.stack(dl), [1000,512]) | ||
print(data.shape) | ||
losses = [] | ||
import time | ||
tt = 0 | ||
for i in range(10): | ||
print(i) | ||
t0 = time.time() | ||
centroids, selected_indices = kmeans_pp(data, 30, dist='euclidean') | ||
t1 = time.time() | ||
tt += t1 -t0 | ||
loss = np.sum(np.min(euc_dist_sq(data, centroids), axis=1)) | ||
losses.append(loss) | ||
print("time : ", tt/10) | ||
|
||
rnd_losses = [] | ||
for i in range(10): | ||
indices = np.random.choice(data.shape[0], size=[30], replace=False) | ||
centroids = data[indices,:] | ||
loss = np.sum(np.min(euc_dist_sq(data, centroids), axis=1)) | ||
rnd_losses.append(loss) | ||
|
||
print("kmeans", sum(losses)/ len(losses), losses) | ||
print("random", sum(rnd_losses)/ len(rnd_losses), rnd_losses) | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
#euc_dist_sq_test() | ||
#hamming_dist_sq_test() | ||
kmeans_pp_test() | ||
kmeans_pp_test2() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import numpy as np | ||
from dppy.finite_dpps import FiniteDPP | ||
|
||
def dpp_init(L,k): | ||
n, m = L.shape | ||
assert n == m, "L should be square numpy matrix" | ||
assert n >= k, "candidate pool should be greater or equal than k" | ||
|
||
S = [0] | ||
cur_det = L[S][:,S] | ||
while len(S) < k: | ||
det_best = -1e9 | ||
S_best = None | ||
for i in range(n): | ||
if i in S: | ||
continue | ||
S_tmp = S + [i] | ||
submat = L[S_tmp][:,S_tmp] | ||
det = np.linalg.det(submat) | ||
if det > det_best: | ||
S_best = S_tmp | ||
det_best = det | ||
S = S_best | ||
cur_det = det_best | ||
return S, cur_det | ||
|
||
def dpp_sample(L, k, T): | ||
n, m = L.shape | ||
assert n == m, "L should be square numpy matrix" | ||
assert n >= k, "candidate pool should be greater or equal than k" | ||
|
||
# greedy insertion | ||
S, cur_det = dpp_init(L, k) | ||
if T == 0: | ||
return S | ||
try: | ||
DPP = FiniteDPP('likelihood', **{'L': L}) | ||
S = DPP.sample_mcmc_k_dpp(size=k, s_init=S, nb_iter=T) | ||
return S | ||
except: | ||
L_ = L + 1e-8 * np.eye(n) | ||
DPP = FiniteDPP('likelihood', **{'L': L_}) | ||
S = DPP.sample_mcmc_k_dpp(size=k, s_init=S, nb_iter=T) | ||
return S |
Oops, something went wrong.