✨ bayes

workelaina · Dec 21, 2024 · 00c3009 · 00c3009
1 parent ee78093
commit 00c3009
Show file tree

Hide file tree

Showing 17 changed files with 2,010 additions and 0 deletions.
diff --git a/textattack/algorithms/__init__.py b/textattack/algorithms/__init__.py
@@ -0,0 +1,2 @@
+from algorithms.discrete_block_bayesian_opt import BlockBayesAttack
+from algorithms.discrete_bayesian_opt import BayesOpt
diff --git a/textattack/algorithms/bayesopt/acquisition/acquisition_function/acquisition_functions.py b/textattack/algorithms/bayesopt/acquisition/acquisition_function/acquisition_functions.py
@@ -0,0 +1,12 @@
+import torch
+from torch.distributions.normal import Normal
+
+def expected_improvement(mean, var, reference):
+	predictive_normal = Normal(mean.new_zeros(mean.size()), mean.new_ones(mean.size()))
+	std = torch.sqrt(var)
+	standardized = (mean - reference) / std
+
+	ucdf = predictive_normal.cdf(standardized)
+	updf = torch.exp(predictive_normal.log_prob(standardized))
+	ei = std * (updf + standardized * ucdf)
+	return ei
diff --git a/textattack/algorithms/bayesopt/acquisition/algorithm/__init__.py b/textattack/algorithms/bayesopt/acquisition/algorithm/__init__.py
@@ -0,0 +1,2 @@
+from algorithms.bayesopt.acquisition.algorithm.kmeanspp import kmeans_pp
+from algorithms.bayesopt.acquisition.algorithm.greedy_ascent import acquisition_maximization_with_indices
diff --git a/textattack/algorithms/bayesopt/acquisition/algorithm/greedy_ascent.py b/textattack/algorithms/bayesopt/acquisition/algorithm/greedy_ascent.py
@@ -0,0 +1,140 @@
+import torch
+import numpy as np
+from algorithms.bayesopt.acquisition.acquisition_function.acquisition_functions import expected_improvement
+from algorithms.bayesopt.dpp.dpp import dpp_sample
+import copy 
+def find_remained_indices(candidates, history_X, N):
+    if len(candidates)==0:
+        return []
+    values, indices = torch.topk(((candidates.long().t() == history_X.long().unsqueeze(-1)).all(dim=1)).int(),1,1 )
+    rm_ids = set([int(ind) for ind in indices[values!=0]])
+    remained_indices = list(set(list(range(N))) - rm_ids)
+    return remained_indices
+
+def find_removed_indices(candidates, history_X):
+    values, indices = torch.topk(((candidates.long().t() == history_X.long().unsqueeze(-1)).all(dim=1)).int(),1,1 )
+    rm_ids = set([int(ind) for ind in indices[values!=0]])
+    return list(rm_ids)
+
+def greedy_ascent_with_indices(center_indices, opt_indices, stage, hb, surrogate_model, batch_size, reference=None, filter=True, return_ei=False, acq_with_opt_indices=True):
+
+    candidates = hb.neighbors(center_indices, stage+1, 0, indices=opt_indices)
+    N, L = candidates.shape
+
+    # filtering observed candidates
+    if filter:
+        remained_indices = find_remained_indices(candidates, hb.eval_X_reduced, N)
+    else:
+        remained_indices = list(set(list(range(N))))
+
+    testX_cate = candidates[remained_indices]
+
+    # calculate acquisition
+    if acq_with_opt_indices:
+        testX = testX_cate[:,opt_indices]
+        centerX = center_indices.view(1,-1)[:,opt_indices]
+    else:
+        testX = testX_cate
+        centerX = center_indices.view(1,-1)
+
+    if reference == None:
+        _, reference, best_ind = hb.best_of_hamming(hb.orig_X, stage+1)
+
+    if len(remained_indices)==0:
+        "something wrong, do larger space in greedy ascent with indices"
+        if return_ei:
+            center_ei = surrogate_model.acquisition(centerX, bias=reference)
+            return center_indices.view(1,-1), torch.Tensor([center_ei])
+        else:
+            return center_indices.view(1,-1)
+
+    ei = surrogate_model.acquisition(testX, bias=reference)
+    #center_ei = surrogate_model.acquisition(centerX, bias=reference)
+
+    topk_values, topk_indices = torch.topk(ei, min(len(ei),batch_size))
+    best_candidates_indices = torch.cat([testX_cate[idx].view(1,-1) for idx in topk_indices],dim=0)
+    if return_ei:
+        return best_candidates_indices, topk_values
+    else:
+        return best_candidates_indices
+import time
+def acquisition_maximization_with_indices(cur_seqs, opt_indices, batch_size, stage, hb, surrogate_model, reference=None, dpp_type='no', acq_with_opt_indices=True):
+    global_candidates_, global_eis_ = [], []
+
+    t0 = time.time()
+    for cur_seq in cur_seqs:
+        cur_indices = hb.reduce_seq(cur_seq).view(1,-1)
+        if acq_with_opt_indices:
+            cur_ei = surrogate_model.acquisition(cur_indices[:,opt_indices], bias=reference)
+        else:
+            cur_ei = surrogate_model.acquisition(cur_indices, bias=reference)
+        global_candidates_.append(cur_indices)
+        global_eis_.append(cur_ei)
+
+        num_next = int(np.ceil(100 / len(cur_seqs)))
+        filtering = True
+        new_candidates_ = []
+        new_eis_ = []
+        new_candidates, new_eis = greedy_ascent_with_indices(cur_indices, opt_indices, stage, hb, surrogate_model, batch_size=num_next, reference=reference, filter=filtering, return_ei=True, acq_with_opt_indices=acq_with_opt_indices)
+        new_candidates_.append(new_candidates)
+        new_eis_.extend(new_eis)
+        N = len(new_candidates_)
+        new_candidates_ = torch.cat(new_candidates_, dim=0)
+
+        candidates, indices = unique(new_candidates_, dim=0)
+        eis = [new_eis_[ind] for ind in indices]
+        assert len(candidates) == len(eis), f'something wrong {len(candidates)}, {len(eis)}'
+
+        global_candidates_.append(candidates)
+        global_eis_.extend(eis)
+    t1 = time.time()
+
+    global_candidates, indices = unique(torch.cat(global_candidates_, dim=0), dim=0)
+    global_eis = [global_eis_[ind] for ind in indices]
+    N, L = global_candidates.shape
+    remained_indices = find_remained_indices(global_candidates, hb.eval_X_reduced, N)
+    t2 = time.time()
+
+    global_candidates = global_candidates[remained_indices]
+    global_eis = [global_eis[ind] for ind in remained_indices]
+    assert len(global_candidates) == len(global_eis), f'something wrong {len(global_candidates)}, {len(global_eis)}'
+
+    global_eis = torch.Tensor(global_eis)
+
+    if len(global_candidates) == 0:
+        return None
+
+    t3 = time.time()
+    if dpp_type == 'no' or dpp_type == 'no_one':
+        topk_values, topk_indices = torch.topk(global_eis, min(len(global_eis),batch_size))
+        candidates = [hb.seq_by_indices(global_candidates[ind]) for ind in topk_indices]
+    elif dpp_type == 'dpp_posterior':
+        t4 = time.time()
+        topk_values, topk_indices = torch.topk(global_eis, min(len(global_eis),100))
+        global_candidates = global_candidates[topk_indices]
+
+        num = min(len(global_candidates), batch_size)
+        if acq_with_opt_indices:
+            Lmat = surrogate_model.get_covar(global_candidates[:,opt_indices].cuda()).cpu().detach().numpy()
+        else:
+            Lmat = surrogate_model.get_covar(global_candidates.cuda()).cpu().detach().numpy()
+        Lmat = Lmat / np.mean(np.abs(Lmat))
+        if Lmat.shape[0] == num:
+            best_indices = list(range(num))
+        else:
+            best_indices = dpp_sample(Lmat, k=num, T=0)
+        candidates = [hb.seq_by_indices(global_candidates[ind]) for ind in best_indices]
+        t5 = time.time()
+    if len(candidates):
+        return candidates
+    else:
+        "something wrong, do larger space"
+        return None
+
+def unique(x, dim=None):
+    unique, inverse = torch.unique(
+        x, sorted=True, return_inverse=True, dim=dim)
+    perm = torch.arange(inverse.size(0), dtype=inverse.dtype,
+                        device=inverse.device)
+    inverse, perm = inverse.flip([0]), perm.flip([0])
+    return unique, inverse.new_empty(unique.size(dim)).scatter_(0, inverse, perm)
diff --git a/textattack/algorithms/bayesopt/acquisition/algorithm/kmeanspp.py b/textattack/algorithms/bayesopt/acquisition/algorithm/kmeanspp.py
@@ -0,0 +1,140 @@
+import numpy as np
+def euc_dist_sq(data1, data2):
+    '''
+    inputs:
+        data1 - numpy array of data points (n1, d)
+        data2 - numpy array of data points (n2, d)
+    '''
+    n1, d1 = data1.shape
+    n2, d2 = data2.shape
+    assert d1 == d2, f"the embedding dimension of data1, data2 are different {d1} != {d2}."
+    d = d1
+    c = np.reshape(data1,[n1,1,d]) - np.reshape(data2,[1,n2,d])
+    dist_sq = np.sum(np.square(c),axis=2)
+    return dist_sq
+
+def euc_dist_sq_test():
+    data1 = np.random.randn(2,4)
+    data2 = np.random.randn(3,4)
+    print("data1 : ", data1)
+    print("data2 : ", data2)
+    print("euc_dist_sq : ", euc_dist_sq(data1,data2))
+
+def hamming_dist_sq(data1, data2):
+    '''
+    inputs:
+        data1 - numpy array of data points (n1, d)
+        data2 - numpy array of data points (n2, d)
+    '''
+    n1, d1 = data1.shape
+    n2, d2 = data2.shape
+    assert d1 == d2, f"the embedding dimension of data1, data2 are different {d1} != {d2}."
+    d = d1
+    c = (np.reshape(data1,[n1,1,d]) != np.reshape(data2,[1,n2,d])) * 1.0
+    dist_sq = np.square(np.sum(c,axis=2))
+    return dist_sq
+
+def hamming_dist_sq_test():
+    data1 = np.random.randint(5, size=[2,4])
+    data2 = np.random.randint(5, size=[3,4])
+    print("data1 : ", data1)
+    print("data2 : ", data2)
+    print("hamming_dist_sq : ", hamming_dist_sq(data1,data2))
+
+def kmeans_pp(data, k, dist='euclidean', init_ind=None):
+    '''
+    initialized the centroids for K-means++
+    inputs:
+        data - numpy array of data points having shape (n, d)
+        k - number of clusters (k <= n)
+        dist - the name of metric
+        init_ind - int (if None, random init index)
+    '''
+    ## initialize the centroids list and add
+    ## a randomly selected data point to the list
+    centroids = []
+    selected_indices = []
+
+    if init_ind is None:
+        init_ind = np.random.randint(data.shape[0])
+    centroids.append(data[init_ind, :])
+    selected_indices.append(init_ind)
+
+    if dist == 'euclidean':
+        d_sq_func = euc_dist_sq
+    elif dist == 'hamming':
+        d_sq_func = hamming_dist_sq
+
+    ## compute remaining centroids
+    for _ in range(k - 1):
+        all_indices = list(range(data.shape[0]))
+        unselected_indices = list(set(all_indices) - set(selected_indices))
+
+        d_sq_to_centroid = d_sq_func(data[unselected_indices], data[selected_indices])
+        min_d_sq_to_centroid = np.min(d_sq_to_centroid, axis=1)
+        if np.sum(min_d_sq_to_centroid)==0:
+            break
+        #prob = min_d_sq_to_centroid / np.sum(min_d_sq_to_centroid)
+        #next_centroid_ind = all_indices[np.random.choice(unselected_indices, p=prob)]
+        next_centroid_ind = unselected_indices[ 
+                    np.argmax(min_d_sq_to_centroid)
+                ]
+
+        selected_indices.append(next_centroid_ind)
+
+        centroids.append(data[next_centroid_ind, :])
+    return np.array(centroids), selected_indices
+
+def kmeans_pp_test():
+    import matplotlib.pyplot as plt
+
+    ## 1. euc
+    data = np.random.randn(100,2)
+    centroids, selected_indices = kmeans_pp(data, 5, dist='euclidean')
+    plt.scatter(data[:,0],data[:,1],label=0)
+    plt.scatter(data[selected_indices,0],data[selected_indices,1],label=1)
+    plt.savefig('kmeans_test1.png')
+    plt.close()
+    ## 2. hamming
+    data = np.random.randint(20,size=[100,2])
+    centroids, selected_indices = kmeans_pp(data, 5, dist='hamming')
+    plt.scatter(data[:,0],data[:,1],label=0)
+    plt.scatter(data[selected_indices,0],data[selected_indices,1],label=1)
+    plt.savefig('kmeans_test2.png')
+
+def kmeans_pp_test2():
+    dl = []
+    for i in range(10):
+        dl.append(np.random.randn(100,512) + i*10)
+    data = np.reshape(np.stack(dl), [1000,512])
+    print(data.shape)
+    losses = []
+    import time
+    tt = 0
+    for i in range(10):
+        print(i)
+        t0 = time.time()
+        centroids, selected_indices = kmeans_pp(data, 30, dist='euclidean')
+        t1 = time.time()
+        tt += t1 -t0
+        loss = np.sum(np.min(euc_dist_sq(data, centroids), axis=1))
+        losses.append(loss)
+    print("time : ", tt/10)
+
+    rnd_losses = []
+    for i in range(10):
+        indices = np.random.choice(data.shape[0], size=[30], replace=False)
+        centroids = data[indices,:]
+        loss = np.sum(np.min(euc_dist_sq(data, centroids), axis=1))
+        rnd_losses.append(loss)
+
+    print("kmeans", sum(losses)/ len(losses), losses)
+    print("random", sum(rnd_losses)/ len(rnd_losses), rnd_losses)
+
+
+
+if __name__ == '__main__':
+    #euc_dist_sq_test()
+    #hamming_dist_sq_test()
+    kmeans_pp_test()
+    kmeans_pp_test2()
diff --git a/textattack/algorithms/bayesopt/dpp/dpp.py b/textattack/algorithms/bayesopt/dpp/dpp.py
@@ -0,0 +1,44 @@
+import numpy as np
+from dppy.finite_dpps import FiniteDPP
+
+def dpp_init(L,k):
+    n, m = L.shape
+    assert n == m, "L should be square numpy matrix"
+    assert n >= k, "candidate pool should be greater or equal than k"
+
+    S = [0]
+    cur_det = L[S][:,S]
+    while len(S) < k:
+        det_best = -1e9
+        S_best = None
+        for i in range(n):
+            if i in S:
+                continue
+            S_tmp = S + [i]
+            submat = L[S_tmp][:,S_tmp]
+            det = np.linalg.det(submat)
+            if det > det_best:
+                S_best = S_tmp
+                det_best = det 
+        S = S_best
+        cur_det = det_best
+    return S, cur_det
+
+def dpp_sample(L, k, T):
+    n, m = L.shape
+    assert n == m, "L should be square numpy matrix"
+    assert n >= k, "candidate pool should be greater or equal than k"
+
+    # greedy insertion
+    S, cur_det = dpp_init(L, k)
+    if T == 0:
+        return S
+    try:
+        DPP = FiniteDPP('likelihood', **{'L': L})
+        S = DPP.sample_mcmc_k_dpp(size=k, s_init=S, nb_iter=T)
+        return S
+    except:
+        L_ = L + 1e-8 * np.eye(n)
+        DPP = FiniteDPP('likelihood', **{'L': L_})
+        S = DPP.sample_mcmc_k_dpp(size=k, s_init=S, nb_iter=T)
+        return S
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from algorithms.discrete_block_bayesian_opt import BlockBayesAttack
		from algorithms.discrete_bayesian_opt import BayesOpt
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from algorithms.bayesopt.acquisition.algorithm.kmeanspp import kmeans_pp
		from algorithms.bayesopt.acquisition.algorithm.greedy_ascent import acquisition_maximization_with_indices