Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hyperparameters estimation for LDA #193

Open
wants to merge 36 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1aa7032
+ added optimization.h and .gitignore updated
alex2304 Nov 18, 2017
5e3e23c
[opt] dirichlet_optimizer class, digamma function
alex2304 Nov 19, 2017
b8dbc7d
[opt] minka_fpi method draft
alex2304 Nov 19, 2017
3dc03a8
[opt] optimization.h errors fixed, test without MeTa
alex2304 Nov 20, 2017
eeb9168
[opt] debug output
alex2304 Nov 20, 2017
766754f
Adding optimization.cpp
MakKolts Nov 20, 2017
98c3e7d
Merge branch 'develop' of https://github.com/alex2304/meta into develop
Nov 20, 2017
e9c99df
[opt] classes for methods in dirichlet_prior
alex2304 Nov 29, 2017
b11e704
Merge branch 'develop' of https://github.com/alex2304/meta into develop
Nov 29, 2017
54d7272
Deletion of previous stuff
MakKolts Nov 29, 2017
6585189
Test for dirichlet optimizations
MakKolts Nov 29, 2017
c0a357c
Private/public methods
MakKolts Nov 29, 2017
76d32ae
[opt] test indexes
alex2304 Nov 29, 2017
4ccda58
Interface for methods
MakKolts Nov 29, 2017
248c151
Refactoring of optimization interface
MakKolts Nov 29, 2017
61ece78
[opt] tmp for merge
alex2304 Nov 29, 2017
f979264
Tests for all functions at same time
MakKolts Nov 29, 2017
ba00c86
[opt] + term_ids()
alex2304 Nov 29, 2017
1f13f95
[opt] merged dirichlet_prior
alex2304 Nov 29, 2017
ed475b5
[opt] + first method without testing
alex2304 Nov 30, 2017
4528ec6
[opt] *first method builds
alex2304 Nov 30, 2017
312a485
[opt] * method works
alex2304 Nov 30, 2017
b60cc54
[opt] *first method debugged
alex2304 Nov 30, 2017
0a0851c
[opt] method refactored
alex2304 Nov 30, 2017
d726f70
[opt] + method2
alex2304 Nov 30, 2017
4a6a240
Adding constructors and register for new ranker classes
MakKolts Nov 30, 2017
f55e0de
Merge branch 'develop' of https://github.com/alex2304/meta into develop
MakKolts Nov 30, 2017
bc948ce
Add rankers to factory
MakKolts Nov 30, 2017
78d6d5c
[opt] + benchmark
alex2304 Nov 30, 2017
25d89d1
Merge branch 'develop' of https://github.com/alex2304/meta into develop
alex2304 Nov 30, 2017
5bc6ee6
Minor fix foor output
MakKolts Nov 30, 2017
4f8fa1d
[opt] + dirichlet_opt files
alex2304 Nov 30, 2017
c8ddfbf
[opt] + dirichlet_prior_opt
alex2304 Nov 30, 2017
f7b634a
[opt] + MacKay and Peto method
alex2304 Dec 1, 2017
d4b0a8d
[opt] + comments and docs
alex2304 Dec 3, 2017
001fac6
[opt] - test files
alex2304 Dec 4, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Test for dirichlet optimizations
MakKolts authored and M committed Nov 29, 2017
commit 65851899819645fa5cd33f4f560ef192e1b87441
21 changes: 14 additions & 7 deletions include/meta/index/ranker/dirichlet_prior.h
Original file line number Diff line number Diff line change
@@ -72,26 +72,33 @@ class dirichlet_prior : public language_model_ranker
};

class dirichlet_prior_opt : public dirichlet_prior{
void rank(ranker_context &ctx, uint64_t num_results, const filter_function_type &filter) const override{
// void rank(ranker_context &ctx, uint64_t num_results, const filter_function_type &filter) const override{
// ranking_function::rank(ctx, num_results, filter);
// }
template <class ForwardIterator>
std::vector<search_result> score(inverted_index& idx, ForwardIterator begin,
ForwardIterator end,
uint64_t num_results = 10)
{
// optimize mu according to ranker_context before ranking
this->optimize_mu(ctx);
this->optimize_mu(idx);

ranking_function::rank(ctx, num_results, filter);
return ranker::score(idx, begin, end, num_results);
}

virtual void optimize_mu(const ranker_context &ctx) = 0;
virtual void optimize_mu(const inverted_index& idx) = 0;
};

class digamma_rec: public dirichlet_prior_opt{
void optimize_mu(const ranker_context &ctx) override;
void optimize_mu(const inverted_index& idx) override;
};

class log_approx: public dirichlet_prior_opt{
void optimize_mu(const ranker_context &ctx) override;
void optimize_mu(const inverted_index& idx) override;
};

class mackay_peto: public dirichlet_prior_opt{
void optimize_mu(const ranker_context &ctx) override;
void optimize_mu(const inverted_index& idx) override;
};

/**
2 changes: 2 additions & 0 deletions src/index/ranker/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
project(meta-ranker)

add_subdirectory(test_opt)

add_library(meta-ranker absolute_discount.cpp
dirichlet_prior.cpp
jelinek_mercer.cpp
2 changes: 1 addition & 1 deletion src/index/ranker/dirichlet_prior.cpp
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@ void dirichlet_prior::save(std::ostream& out) const
io::packed::write(out, mu_);
}

float dirichlet_prior::smoothed_prob(score_data& sd)
float dirichlet_prior::smoothed_prob(const score_data& sd) const
{
float pc = static_cast<float>(sd.corpus_term_count) / sd.total_terms;
float numerator = sd.doc_term_count + mu_ * pc;
8 changes: 8 additions & 0 deletions src/index/ranker/test_opt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
project(meta-dirichlet-test)

include_directories(../../../../include)

add_executable(test_opt test.cpp)

target_link_libraries(test_opt meta-ranker)

12 changes: 12 additions & 0 deletions src/index/ranker/test_opt/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "meta/corpus/document.h"
#include "meta/index/ranker/all.h"
#include "meta/index/forward_index.h"

#include <iostream>


int main(){

std::cout << "Quaia!" << std::endl;
meta::index::dirichlet_prior ranker;
}
2 changes: 1 addition & 1 deletion src/stats/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
project(meta-stats)

add_library(meta-stats running_stats.cpp optimization.cpp)
add_library(meta-stats running_stats.cpp)
target_link_libraries(meta-stats meta-definitions)

install(TARGETS meta-stats