diff --git a/src/test_dtree.h b/src/test_dtree.h deleted file mode 100644 index b5e24e8..0000000 --- a/src/test_dtree.h +++ /dev/null @@ -1,62 +0,0 @@ -#include - -#include "dtree.h" - -class TestDTree : public CxxTest::TestSuite { - DTree dt; - -public: - - void setUp() { - } - - void tearDown() { - } - - void test_parse() { - dt.parse("0,1:2,3;4,5"); - TS_ASSERT_EQUALS(dt.eps.size(), 2); - TS_ASSERT_EQUALS(dt.eps[0].size(), 2); - TS_ASSERT_EQUALS(dt.eps[0][0], 0); - TS_ASSERT_EQUALS(dt.eps[0][1], 1); - TS_ASSERT_EQUALS(dt.eps[1].size(), 2); - TS_ASSERT_EQUALS(dt.eps[1][0], 2); - TS_ASSERT_EQUALS(dt.eps[1][1], 3); - TS_ASSERT_EQUALS(dt.np.size(), 2); - TS_ASSERT_EQUALS(dt.np[0], 4); - TS_ASSERT_EQUALS(dt.np[1], 5); - - dt.parse(";"); - TS_ASSERT_EQUALS(dt.eps.size(), 0); - TS_ASSERT_EQUALS(dt.np.size(), 0); - - dt.parse("0,1,2;"); - TS_ASSERT_EQUALS(dt.eps.size(), 1); - TS_ASSERT_EQUALS(dt.eps[0].size(), 3); - TS_ASSERT_EQUALS(dt.eps[0][0], 0); - TS_ASSERT_EQUALS(dt.eps[0][1], 1); - TS_ASSERT_EQUALS(dt.eps[0][2], 2); - TS_ASSERT_EQUALS(dt.np.size(), 0); - - dt.parse(";0,1,2"); - TS_ASSERT_EQUALS(dt.eps.size(), 0); - TS_ASSERT_EQUALS(dt.np.size(), 3); - TS_ASSERT_EQUALS(dt.np[0], 0); - TS_ASSERT_EQUALS(dt.np[1], 1); - TS_ASSERT_EQUALS(dt.np[2], 2); - } - - void test_str() { - dt.parse("0,1:2,3;4,5"); - TS_ASSERT_EQUALS(dt.str(), "Ep(0,1)^Ep(2,3)^Np(4,5)"); - - dt.parse(";"); - TS_ASSERT_EQUALS(dt.str(), ""); - - dt.parse("0,1,2;"); - TS_ASSERT_EQUALS(dt.str(), "Ep(0,1,2)"); - - dt.parse(";0,1,2"); - TS_ASSERT_EQUALS(dt.str(), "Np(0,1,2)"); - } -}; diff --git a/src/test_lda.h b/src/test_lda.h deleted file mode 100644 index 54f0ce9..0000000 --- a/src/test_lda.h +++ /dev/null @@ -1,252 +0,0 @@ -#include - -#include -using namespace std; - -#include "lda.h" - -class TestLDA : public CxxTest::TestSuite { - LDA lda; - double delta; - - public: - - void setUp() { - string data_file = "../data/test.dat"; - int num_topics = 2; - double alpha = 0.1; - double beta = 0.1; - lda = LDA(data_file, "", num_topics, alpha, beta); - delta = 0.00001; - } - - void tearDown() { - } - - void test_initialize() { - lda.load_data(lda.data_file); - lda.initialize(); - - TS_ASSERT_EQUALS(lda.cz.size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_EQUALS(lda.cz[z], 0); - } - - TS_ASSERT_EQUALS(lda.cdz.size(), lda.num_docs); - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_EQUALS(lda.cdz[d].size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_EQUALS(lda.cdz[d][z], 0); - } - } - - TS_ASSERT_EQUALS(lda.cwz.size(), lda.num_words); - for(int w = 0; w < lda.num_words; ++w) { - TS_ASSERT_EQUALS(lda.cwz[w].size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_EQUALS(lda.cwz[w][z], 0); - } - } - - TS_ASSERT_EQUALS(lda.hz.size(), lda.num_docs); - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_EQUALS(lda.hz[d].size(), lda.nd[d]); - for(int i = 0; i < lda.nd[d]; ++i) { - TS_ASSERT_EQUALS(lda.hz[d][i], 0); - } - } - - TS_ASSERT_EQUALS(lda.alphas.size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_DELTA(lda.alphas[z], lda.alpha, delta); - } - - TS_ASSERT_EQUALS(lda.betas.size(), lda.num_words); - for(int w = 0; w < lda.num_words; ++w) { - TS_ASSERT_DELTA(lda.betas[w], lda.beta, delta); - } - - TS_ASSERT_EQUALS(lda.probs.size(), lda.num_topics); - TS_ASSERT_EQUALS(lda.phi.size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_EQUALS(lda.phi[z].size(), lda.num_words); - } - TS_ASSERT_EQUALS(lda.theta.size(), lda.num_docs); - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_EQUALS(lda.theta[d].size(), lda.num_topics); - } - } - - void test_preprocess() { - lda.load_data(lda.data_file); - lda.initialize(); - lda.preprocess(); - - TS_ASSERT_EQUALS(sum(lda.cz), lda.num_terms); - - int sum_cdz = 0; - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_EQUALS(sum(lda.cdz[d]), lda.nd.size()); - sum_cdz += sum(lda.cdz[d]); - } - TS_ASSERT_EQUALS(sum_cdz, lda.num_terms); - - int sum_cwz = 0; - for(int w = 0; w < lda.num_words; ++w) { - sum_cwz += sum(lda.cwz[w]); - } - TS_ASSERT_EQUALS(sum_cwz, lda.num_terms); - } - - void test_load_data() { - lda.load_data(lda.data_file); - TS_ASSERT_EQUALS(lda.docs.size(), 4); - TS_ASSERT_EQUALS(lda.docs[0].size(), 4); - TS_ASSERT_EQUALS(lda.docs[1].size(), 4); - TS_ASSERT_EQUALS(lda.docs[0][0], 0); - TS_ASSERT_EQUALS(lda.docs[0][1], 0); - TS_ASSERT_EQUALS(lda.docs[0][2], 1); - TS_ASSERT_EQUALS(lda.docs[0][3], 1); - TS_ASSERT_EQUALS(lda.docs[1][0], 0); - TS_ASSERT_EQUALS(lda.docs[1][1], 0); - TS_ASSERT_EQUALS(lda.docs[1][2], 2); - TS_ASSERT_EQUALS(lda.docs[1][3], 2); - - TS_ASSERT_EQUALS(lda.num_docs, 4); - TS_ASSERT_EQUALS(lda.num_words, 3); - TS_ASSERT_EQUALS(lda.num_terms, 16); - - TS_ASSERT_EQUALS(lda.nd.size(), 4); - TS_ASSERT_EQUALS(lda.nd[0], 4); - TS_ASSERT_EQUALS(lda.nd[1], 4); - TS_ASSERT_EQUALS(lda.nd[2], 4); - TS_ASSERT_EQUALS(lda.nd[3], 4); - } - - void test_resample_pre() { - lda.load_data(lda.data_file); - lda.initialize(); - lda.preprocess(); - - int cz = lda.cz[0]; - int cdz = lda.cdz[0][0]; - int cwz = lda.cwz[0][0]; - lda.resample_pre(0, 0, 0); - - TS_ASSERT_EQUALS(lda.cz[0], cz - 1); - TS_ASSERT_EQUALS(lda.cdz[0][0], cdz - 1); - TS_ASSERT_EQUALS(lda.cwz[0][0], cwz - 1); - } - - void test_resample_post() { - lda.load_data(lda.data_file); - lda.initialize(); - lda.preprocess(); - - int cz = lda.cz[0]; - int cdz = lda.cdz[0][0]; - int cwz = lda.cwz[0][0]; - lda.resample_post(0, 0, 0); - - TS_ASSERT_EQUALS(lda.cz[0], cz + 1); - TS_ASSERT_EQUALS(lda.cdz[0][0], cdz + 1); - TS_ASSERT_EQUALS(lda.cwz[0][0], cwz + 1); - } - - void test_calc_probs() { - lda.load_data(lda.data_file); - lda.initialize(); - - // toy sample - lda.cz[0] = lda.num_terms; - lda.cdz[0][0] = lda.num_terms; - lda.cwz[0][0] = lda.num_terms; - - lda.calc_probs(0, 0, lda.probs); - - double alpha = lda.alpha; - double beta = lda.alpha; - int num_terms = lda.num_terms; - int num_words = lda.num_words; - int num_topics = lda.num_topics; - - vector true_probs(num_topics, (0 + beta) * (0 + alpha) / (0 + beta * num_words)); - true_probs[0] = (num_terms + beta) * (num_terms + alpha) / (num_terms + beta * num_words); - norm(true_probs); - - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_DELTA(lda.probs[z], true_probs[z], delta); - } - } - - void test_calc_perplexity() { - lda.load_data(lda.data_file); - lda.initialize(); - - // toy sample - lda.cz[0] = lda.num_terms; - lda.cdz[0][0] = lda.num_terms; - lda.cwz[0][0] = lda.num_terms; - - double pp = lda.calc_perplexity(); - TS_ASSERT_DELTA(pp, 4.15282, delta); - } - - void test_get_phi_theta() { - lda.load_data(lda.data_file); - lda.initialize(); - - // toy sample - lda.cz[0] = lda.num_terms; - lda.cdz[0][0] = lda.num_terms; - lda.cwz[0][0] = lda.num_terms; - - double alpha = lda.alpha; - double beta = lda.alpha; - double num_terms = lda.num_terms; - double num_words = lda.num_words; - double num_topics = lda.num_topics; - - double true_phi[2][3] = { - {(num_terms + beta) / (num_terms + num_words * beta), - (0 + beta)/(num_terms + num_words * beta), - (0 + beta)/(num_terms + num_words * beta) - }, - {(0 + beta)/(0 + num_words * beta), - (0 + beta)/(0 + num_words * beta), - (0 + beta)/(0 + num_words * beta) - }, - }; - - lda.get_phi(lda.phi); - for(int z = 0; z < num_topics; ++z) { - TS_ASSERT_DELTA(sum(lda.phi[z]), 1.0, delta); - for(int w = 0; w < lda.num_words; ++w) { - TS_ASSERT_DELTA(lda.phi[z][w], true_phi[z][w], delta); - } - } - - double true_theta[4][2] = { - {(num_terms + alpha) / (num_terms + num_topics * alpha), - (0 + alpha) / (num_terms + num_topics * alpha) - }, - {(0 + alpha) / (0 + num_topics * alpha), - (0 + alpha) / (0 + num_topics * alpha) - }, - {(0 + alpha) / (0 + num_topics * alpha), - (0 + alpha) / (0 + num_topics * alpha) - }, - {(0 + alpha) / (0 + num_topics * alpha), - (0 + alpha) / (0 + num_topics * alpha) - }, - }; - - lda.get_theta(lda.theta); - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_DELTA(sum(lda.theta[d]), 1.0, delta); - for(int z = 0; z < num_topics; ++z) { - TS_ASSERT_DELTA(lda.theta[d][z], true_theta[d][z], delta); - } - } - } -}; diff --git a/src/test_ldadf.h b/src/test_ldadf.h deleted file mode 100644 index 0742553..0000000 --- a/src/test_ldadf.h +++ /dev/null @@ -1,381 +0,0 @@ -#include - -#include "ldadf.h" -#include "utils.h" - -using namespace std; - -class TestLDADF : public CxxTest::TestSuite { - LDADF lda; - double delta; - string tmp_file; - -public: - - void setUp() { - string dat_file = "../data/test.dat"; - string out_base = ""; - string dnf_file = "../data/test.dnf"; - int num_topics = 2; - int max_steps = 10; - int num_loops = 10; - int burn_in = 5; - int seed = 0; - double alpha = 1.0; - double beta = 0.01; - double eta = 10; - bool converge = false; - bool verbose = false; - lda = LDADF(dat_file, out_base, num_topics, alpha, beta, - max_steps, num_loops, burn_in, converge, seed, verbose, - dnf_file, eta); - delta = 0.00001; - tmp_file = "./test.tmp"; - } - - void tearDown() { - } - - void test_initialize() { - lda.initialize(); - - TS_ASSERT_EQUALS(lda.ctz.size(), lda.num_dtrees); - TS_ASSERT_EQUALS(lda.ctze.size(), lda.num_dtrees); - for(int t = 0; t < lda.num_dtrees; ++t) { - TS_ASSERT_EQUALS(lda.ctz[t].size(), lda.num_topics); - TS_ASSERT_EQUALS(lda.ctze[t].size(), lda.num_topics); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT_EQUALS(lda.ctz[t][z], 0); - TS_ASSERT_EQUALS(lda.ctze[t][z].size(), lda.dtrees[t].eps.size()); - for(int e = 0; e < lda.dtrees[t].eps.size(); ++e) { - TS_ASSERT_EQUALS(lda.ctze[t][z][e], 0); - } - } - } - - TS_ASSERT_EQUALS(lda.dz.size(), lda.num_topics); - } - - void test_preprocess() { - lda.initialize(); - lda.preprocess(); - - TS_ASSERT_EQUALS(sum(lda.cz), lda.num_terms); - - int sum_cdz = 0; - for(int d = 0; d < lda.num_docs; ++d) { - TS_ASSERT_EQUALS(sum(lda.cdz[d]), lda.nd.size()); - sum_cdz += sum(lda.cdz[d]); - } - TS_ASSERT_EQUALS(sum_cdz, lda.num_terms); - - int sum_cwz = 0; - for(int w = 0; w < lda.num_words; ++w) { - sum_cwz += sum(lda.cwz[w]); - } - TS_ASSERT_EQUALS(sum_cwz, lda.num_terms); - - for(int t = 0; t < lda.num_dtrees; ++t) { - DTree &dt = lda.dtrees[t]; - for(int z = 0; z < lda.num_topics; ++z) { - int ctz = 0; - vector ctze(lda.ctze[t][z].size(), 0); - for(int w = 0; w < lda.num_words; ++w) { - int e; - switch(dt.get_type(w)) { - case DTree::Ep: - e = dt.get_ep(w); - ctze[e] += lda.cwz[w][z]; - ctz += lda.cwz[w][z]; - break; - case DTree::None: - ctz += lda.cwz[w][z]; - break; - } - } - TS_ASSERT_EQUALS(ctz, lda.ctz[t][z]); - for(int e = 0; e < dt.eps.size(); ++e) { - TS_ASSERT_EQUALS(ctze[e], lda.ctze[t][z][e]); - } - } - } - } - - void test_resample() { - lda.initialize(); - lda.preprocess(); - lda.resample(); - for(int z = 0; z < lda.num_topics; ++z) { - TS_ASSERT(lda.dz[z] >= 0); - TS_ASSERT(lda.dz[z] < lda.num_dtrees); - } - } - - void test_resample_post_pre() { - lda.initialize(); - lda.dz[0] = 0; - lda.dz[1] = 1; - - // fixed sampling - for(int i = 0; i < 4; ++i) { - lda.resample_post(0, 0, 1); - lda.resample_post(1, 0, 0); - lda.resample_post(2, 1, 1); - lda.resample_post(3, 2, 0); - } - - TS_ASSERT_EQUALS(lda.cdz[0][1], 4); - TS_ASSERT_EQUALS(lda.cdz[1][0], 4); - TS_ASSERT_EQUALS(lda.cdz[2][1], 4); - TS_ASSERT_EQUALS(lda.cdz[3][0], 4); - TS_ASSERT_EQUALS(lda.cdz[0][0], 0); - TS_ASSERT_EQUALS(lda.cdz[1][1], 0); - TS_ASSERT_EQUALS(lda.cdz[2][0], 0); - TS_ASSERT_EQUALS(lda.cdz[3][1], 0); - TS_ASSERT_EQUALS(lda.cz[0], 8); - TS_ASSERT_EQUALS(lda.cz[1], 8); - TS_ASSERT_EQUALS(lda.cwz[0][0], 4); - TS_ASSERT_EQUALS(lda.cwz[0][1], 4); - TS_ASSERT_EQUALS(lda.cwz[1][0], 0); - TS_ASSERT_EQUALS(lda.cwz[1][1], 4); - TS_ASSERT_EQUALS(lda.cwz[2][0], 4); - TS_ASSERT_EQUALS(lda.cwz[2][1], 0); - - TS_ASSERT_EQUALS(lda.ctz[0][0], 4); - TS_ASSERT_EQUALS(lda.ctze[0][0].size(), 0); - TS_ASSERT_EQUALS(lda.ctz[1][1], 8); - TS_ASSERT_EQUALS(lda.ctze[1][1].size(), 1); - TS_ASSERT_EQUALS(lda.ctze[1][1][0], 8); - - // reversed fixed-sampling - for(int i = 0; i < 4; ++i) { - lda.resample_pre(0, 0, 1); - lda.resample_pre(1, 0, 0); - lda.resample_pre(2, 1, 1); - lda.resample_pre(3, 2, 0); - } - - TS_ASSERT_EQUALS(lda.cdz[0][1], 0); - TS_ASSERT_EQUALS(lda.cdz[1][0], 0); - TS_ASSERT_EQUALS(lda.cdz[2][1], 0); - TS_ASSERT_EQUALS(lda.cdz[3][0], 0); - TS_ASSERT_EQUALS(lda.cdz[0][0], 0); - TS_ASSERT_EQUALS(lda.cdz[1][1], 0); - TS_ASSERT_EQUALS(lda.cdz[2][0], 0); - TS_ASSERT_EQUALS(lda.cdz[3][1], 0); - TS_ASSERT_EQUALS(lda.cz[0], 0); - TS_ASSERT_EQUALS(lda.cz[1], 0); - TS_ASSERT_EQUALS(lda.cwz[0][0], 0); - TS_ASSERT_EQUALS(lda.cwz[0][1], 0); - TS_ASSERT_EQUALS(lda.cwz[1][0], 0); - TS_ASSERT_EQUALS(lda.cwz[1][1], 0); - TS_ASSERT_EQUALS(lda.cwz[2][0], 0); - TS_ASSERT_EQUALS(lda.cwz[2][1], 0); - - TS_ASSERT_EQUALS(lda.ctz[0][0], 0); - TS_ASSERT_EQUALS(lda.ctze[0][0].size(), 0); - TS_ASSERT_EQUALS(lda.ctz[1][1], 0); - TS_ASSERT_EQUALS(lda.ctze[1][1].size(), 1); - TS_ASSERT_EQUALS(lda.ctze[1][1][0], 0); - } - - void test_load_dnf() { - lda.load_dnf(lda.dnf_file); - TS_ASSERT_EQUALS(lda.dtrees.size(), 2); - TS_ASSERT_EQUALS(lda.dtrees[0].eps.size(), 0); - TS_ASSERT_EQUALS(lda.dtrees[0].np.size(), 2); - TS_ASSERT_EQUALS(lda.dtrees[0].np[0], 0); - TS_ASSERT_EQUALS(lda.dtrees[0].np[1], 1); - TS_ASSERT_EQUALS(lda.dtrees[1].eps.size(), 1); - TS_ASSERT_EQUALS(lda.dtrees[1].eps[0].size(), 2); - TS_ASSERT_EQUALS(lda.dtrees[1].eps[0][0], 0); - TS_ASSERT_EQUALS(lda.dtrees[1].eps[0][1], 1); - TS_ASSERT_EQUALS(lda.dtrees[1].np.size(), 1); - TS_ASSERT_EQUALS(lda.dtrees[1].np[0], 2); - } - - void test_calc_dtree_probs() { - lda.initialize(); - lda.dz[0] = 0; - lda.dz[1] = 1; - - // fixed sampling - for(int i = 0; i < 4; ++i) { - lda.resample_post(0, 0, 1); - lda.resample_post(0, 0, 0); - lda.resample_post(0, 1, 1); - lda.resample_post(0, 2, 0); - } - - double beta = lda.beta; - double eta = lda.eta; - int num_np, num_nonp, num_ep; - - vector true_probs(2); - vector probs(2); - - // dtree probs when z = 0 - lda.calc_dtree_probs(0, probs); - - // true prob when z = 0 and dnf = ";0,1" - num_np = 2; - num_nonp = 1; - num_ep = 0; - true_probs[0] = num_nonp - // from root node - * tgamma(beta * eta * num_nonp + beta * num_np) / tgamma(8 + beta * eta * num_nonp + beta * num_np) - * tgamma(4 + beta * eta * num_nonp) / tgamma(beta * eta * num_nonp) // non-np node - * tgamma(4 + beta) / tgamma(beta) // Np(0) - * tgamma(0 + beta) / tgamma(beta) // Np(1) - // from non-np node - * tgamma(beta * num_nonp) / tgamma(4 + beta * num_nonp) - * tgamma(4 + beta) / tgamma(beta); // normal leaf 2 - - // true prob when z = 0 and dnf = "0,1;2" - num_np = 1; - num_nonp = 2; - num_ep = 2; - true_probs[1] = num_nonp - // from root node - * tgamma(beta * eta * num_nonp + beta * num_np) / tgamma(8 + beta * eta * num_nonp + beta * num_np) - * tgamma(4 + beta * eta * num_nonp) / tgamma(beta * eta * num_nonp) // non-np node - * tgamma(4 + beta) / tgamma(beta) // Np(2) - // from non-np node - * tgamma(beta * num_nonp) / tgamma(4 + beta * num_nonp) - * tgamma(4 + beta * num_ep) / tgamma(beta * num_ep) // eps node - // from eps node - * tgamma(beta * eta * num_ep) / tgamma(4 + beta * eta * num_ep) - * tgamma(4 + beta * eta) / tgamma(beta * eta) // 0 of Ep(0,1) - * tgamma(0 + beta * eta) / tgamma(beta * eta); // 1 of Ep(0,1) - norm(true_probs); - - TS_ASSERT_DELTA(probs[0], true_probs[0], delta); - TS_ASSERT_DELTA(probs[1], true_probs[1], delta); - - // dtree probs when z = 1 - lda.calc_dtree_probs(1, probs); - - // true prob when z = 1 and dnf = ";0,1" - num_np = 2; - num_nonp = 1; - num_ep = 0; - true_probs[0] = 1 - // for root node - * tgamma(beta * eta * num_nonp + beta * num_np) / tgamma(8 + beta * eta * num_nonp + beta * num_np) - * tgamma(0 + beta * eta * num_nonp) / tgamma(beta * eta * num_nonp) // non-np node - * tgamma(4 + beta) / tgamma(beta) // Np(0) - * tgamma(4 + beta) / tgamma(beta) // Np(1) - // for non-np node - * tgamma(beta * num_nonp) / tgamma(0 + beta * num_nonp) - * tgamma(0 + beta) / tgamma(beta); // normal leaf 2 - - // true prob when z = 1 and dnf = "0,1;2" - num_np = 1; - num_nonp = 2; - num_ep = 2; - true_probs[1] = 2 - // for root node - * tgamma(beta * eta * num_nonp + beta * num_np) / tgamma(8 + beta * eta * num_nonp + beta * num_np) - * tgamma(8 + beta * eta * num_nonp) / tgamma(beta * eta * num_nonp) // non-np node - * tgamma(0 + beta) / tgamma(beta) // Np(2) - // for non-np node - * tgamma(beta * num_nonp) / tgamma(8 + beta * num_nonp) - * tgamma(8 + beta * num_ep) / tgamma(beta * num_ep) // eps node - // for eps node - * tgamma(beta * eta * num_ep) / tgamma(8 + beta * eta * num_ep) - * tgamma(4 + beta * eta) / tgamma(beta * eta) // 0 of Ep(0,1) - * tgamma(4 + beta * eta) / tgamma(beta * eta); // 1 of Ep(0,1) - norm(true_probs); - - TS_ASSERT_DELTA(probs[0], true_probs[0], delta); - TS_ASSERT_DELTA(probs[1], true_probs[1], delta); - } - - void test_calc_dtree_probs_real() { - // make dnf file - ofstream file(tmp_file.c_str()); - file << ";1,0\n0,1;2\n0,2;1\n;2,0"; // (ML(0,1) | ML(0,2)) & CL(1,2) - file.close(); - - lda.dnf_file = tmp_file; - lda.initialize(); - - // 1st sample of seed=92 - int sample_hz[4][4] = { - {1, 0, 1, 1}, - {0, 1, 0, 0}, - {1, 1, 1, 1}, - {0, 1, 1, 1} - }; - - // preprocess by sample_hz - for(int d = 0; d < lda.num_docs; ++d) { - for(int i = 0; i < lda.nd[d]; ++i) { - int w = lda.docs[d][i]; - int z = sample_hz[d][i]; - lda.resample_post(d, w, z); - } - } - - // true probs with eta=10, alpha=0.01, beta=0.01, - double true_probs[2][4] = { - {0.0764325, 0.0770787, 0.839471, 0.0070179}, - {0.0410388, 0.47415, 0.440653, 0.0441584} - }; - - vector probs(lda.num_dtrees); - for(int z = 0; z < lda.num_topics; ++z) { - lda.calc_dtree_probs(z, probs); - for(int t = 0; t < lda.num_dtrees; ++t) { - TS_ASSERT_DELTA(probs[t], true_probs[z][t], delta); - } - } - } - - void test_calc_prob_weight() { - lda.initialize(); - lda.dz[0] = 0; - lda.dz[1] = 1; - - // fixed sampling - for(int i = 0; i < 4; ++i) { - lda.resample_post(0, 0, 1); - lda.resample_post(0, 0, 0); - lda.resample_post(0, 1, 1); - lda.resample_post(0, 2, 0); - } - - double beta = lda.beta; - double eta = lda.eta; - int num_words = lda.num_words; - int num_topics = lda.num_topics; - int num_np, num_nonp, num_ep; - - // z = 0 -> dnf = ";0,1" - num_np = 2; - num_nonp = 1; - num_ep = 0; - TS_ASSERT_EQUALS(lda.calc_prob_weight(0, 0), // Np(0) - (4 + beta) / (8 + beta * eta * num_nonp + beta * num_np)); - TS_ASSERT_EQUALS(lda.calc_prob_weight(1, 0), // Np(1) - (0 + beta) / (8 + beta * eta * num_nonp + beta * num_np)); - TS_ASSERT_EQUALS(lda.calc_prob_weight(2, 0), // free - (4 + beta) / (4 + beta * num_nonp) - * (4 + beta * eta * num_nonp) / (8 + beta * eta * num_nonp + beta * num_np)); - - // z = 1 -> dnf = "0,1;2" - num_np = 1; - num_nonp = 2; - num_ep = 2; - TS_ASSERT_EQUALS(lda.calc_prob_weight(0, 1), // Ep(0,1) - (4 + beta * eta) / (8 + beta * eta * num_ep) - * (8 + beta * num_ep) / (8 + beta * num_nonp) - * (8 + beta * eta * num_nonp) / (8 + beta * eta * num_nonp + beta * num_np)); - TS_ASSERT_EQUALS(lda.calc_prob_weight(1, 1), // Ep(0,1) - (4 + beta * eta) / (8 + beta * eta * num_ep) - * (8 + beta * num_ep) / (8 + beta * num_nonp) - * (8 + beta * eta * num_nonp) / (8 + beta * eta * num_nonp + beta * num_np)); - TS_ASSERT_EQUALS(lda.calc_prob_weight(2, 1), // Np(2) - (0 + beta) / (8 + beta * eta * num_nonp + beta * num_np)); - } -}; diff --git a/src/test_utils.h b/src/test_utils.h deleted file mode 100644 index e1af8d9..0000000 --- a/src/test_utils.h +++ /dev/null @@ -1,188 +0,0 @@ -#include - -#include -using namespace std; - -#include "utils.h" -using namespace ldautils; - -class TestUtils : public CxxTest::TestSuite { - double delta; - string tmp_file; - - public: - - void setUp() { - delta = 0.00001; - tmp_file = "./test.tmp"; - } - - void tearDown() { - } - - /* string */ - - void test_split() { - vector list, list2; - split("1:2 3:4 ", ' ', list); - //TS_ASSERT_EQUALS((int)list.size(), 3); - TS_ASSERT_EQUALS((int)list.size(), 4); - TS_ASSERT_EQUALS(list[0], string("1:2")); - TS_ASSERT_EQUALS(list[1], string("")); - TS_ASSERT_EQUALS(list[2], string("3:4")); - split(list[0], ':', list2); // "1:2" - TS_ASSERT_EQUALS((int)list2.size(), 2); - TS_ASSERT_EQUALS(list2[0], string("1")); - TS_ASSERT_EQUALS(list2[1], string("2")); - split(list[1], ':', list2); // "" - //TS_ASSERT_EQUALS((int)list2.size(), 0); - TS_ASSERT_EQUALS((int)list2.size(), 1); - split(list[2], ':', list2); // "3:4" - TS_ASSERT_EQUALS((int)list2.size(), 2); - TS_ASSERT_EQUALS(list2[0], string("3")); - TS_ASSERT_EQUALS(list2[1], string("4")); - split(list[3], ':', list2); // "" - TS_ASSERT_EQUALS((int)list2.size(), 1); - - split("1;", ';', list); - TS_ASSERT_EQUALS((int)list.size(), 2); - TS_ASSERT_EQUALS(list[0], "1"); - split(";2", ';', list); - TS_ASSERT_EQUALS((int)list.size(), 2); - TS_ASSERT_EQUALS(list[0], ""); - TS_ASSERT_EQUALS(list[1], "2"); - } - - /* math */ - - void test_math() { - double a[] = {2.0, 1.0, 3.0}; - vector vec(a, a+3); - TS_ASSERT_EQUALS(max(vec), 3.0); - TS_ASSERT_EQUALS(min(vec), 1.0); - TS_ASSERT_EQUALS(sum(vec), 6.0); - TS_ASSERT_EQUALS(argmax(vec), 2); - } - - void test_digamma() { - TS_ASSERT_DELTA(digamma(1), -0.57721566, delta); - TS_ASSERT_DELTA(digamma(5), 1.50611766, delta); - TS_ASSERT_DELTA(digamma(10), 2.25175258, delta); - } - - /* prob */ - - void test_norm() { - vector vec(2, 1.0); - norm(vec); - TS_ASSERT_EQUALS(vec[0], 0.5); - TS_ASSERT_EQUALS(vec[1], 0.5); - vector vec2(2, 0.0); - norm(vec2); - TS_ASSERT_EQUALS(vec2[0], 0.5); - TS_ASSERT_EQUALS(vec2[1], 0.5); - } - - void test_multi() { - vector vec(2, 0.0); - vec[0] = 1.0; - TS_ASSERT_EQUALS(multi(vec), 0); - vector vec2(10, 0.0); - vec2[9] = 1.0; - TS_ASSERT_EQUALS(multi(vec2), 9); - } - - /* matrix */ - - void test_transpose() { - double matval[2][3] = { - {1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0} - }; - vector > mat; - for(int i = 0; i < 2; ++i) { - mat.push_back(vector(matval[i], matval[i] + 3)); - } - vector > tmat; - transpose(mat, tmat); - - double true_mat[3][2] = { - {1.0, 4.0}, - {2.0, 5.0}, - {3.0, 6.0} - }; - TS_ASSERT_EQUALS(static_cast(tmat.size()), 3); - for(int i = 0; i < 3; ++i) { - TS_ASSERT_EQUALS(static_cast(tmat[i].size()), 2); - for(int j = 0; j < 2; ++j) { - TS_ASSERT_EQUALS(tmat[i][j], true_mat[i][j]); - } - } - } - - void test_save_matrix() { - double matval[2][3] = { - {1.1, 1.2, 1.3}, - {2.1, 2.2, 2.3}, - }; - vector > mat; - for(int i = 0; i < 2; ++i) { - mat.push_back(vector(matval[i], matval[i] + 3)); - } - save_matrix(tmp_file, mat); - - string line; - ifstream file(tmp_file.c_str()); - const char *true_mat[] = { - "1.1 1.2 1.3 ", - "2.1 2.2 2.3 " - }; - for(int i = 0; getline(file, line); ++i) { - TS_ASSERT_EQUALS(line, true_mat[i]); - } - } - - void test_save_matrix_t() { - double matval[2][3] = { - {1.1, 1.2, 1.3}, - {2.1, 2.2, 2.3}, - }; - vector > mat; - for(int i = 0; i < 2; ++i) { - mat.push_back(vector(matval[i], matval[i] + 3)); - } - save_matrix_t(tmp_file, mat); - - string line; - ifstream file(tmp_file.c_str()); - const char *true_mat[] = { - "1.1 2.1 ", - "1.2 2.2 ", - "1.3 2.3 " - }; - for(int i = 0; getline(file, line); ++i) { - TS_ASSERT_EQUALS(line, true_mat[i]); - } - } - - void test_load_matrix() { - ofstream file(tmp_file.c_str()); - file << "1.1 1.2 1.3 " << endl; - file << "2.1 2.2 2.3 " << endl; - file.close(); - vector > mat; - load_matrix(tmp_file, mat); - - double true_mat[2][3] = { - {1.1, 1.2, 1.3}, - {2.1, 2.2, 2.3} - }; - for(int i = 0; i < mat.size(); ++i) { - for(int j = 0; j < mat[0].size(); ++j) { - TS_ASSERT_DELTA(mat[i][j], true_mat[i][j], delta); - } - } - } - -}; -