Skip to content

Commit

Permalink
narrow feature space for full study
Browse files Browse the repository at this point in the history
  • Loading branch information
timovdk committed Jan 23, 2025
1 parent e722d81 commit 3e7476c
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion asreview2-optuna/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

def naive_bayes_params(trial: optuna.trial.FrozenTrial):
# Use logarithmic normal distribution for alpha (alpha effect is non-linear)
alpha = trial.suggest_float("alpha", 0.1, 100, log=True)
alpha = trial.suggest_float("alpha", 0.5, 50, log=True)
#alpha = trial.suggest_float("nb__alpha", 1.0, 15.0)
return {"alpha": alpha}

Expand Down
10 changes: 5 additions & 5 deletions asreview2-optuna/feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
def tfidf_params(trial: optuna.trial.FrozenTrial):
#max_features = trial.suggest_int("tfidf__max_features", 15_000, 50_000)

max_df = trial.suggest_float("tfidf__max_df", 0.5, 1.0)
max_df = trial.suggest_float("tfidf__max_df", 0.6, 0.9)

min_df = trial.suggest_int("tfidf__min_df", 1, 10)
min_df = trial.suggest_int("tfidf__min_df", 1, 5)

max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 1, 3)
ngram_range = (1, max_ngram_range)
#max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 1, 3)
ngram_range = (1, 2)

sublinear_tf = trial.suggest_categorical("tfidf__sublinear_tf", [True, False])
sublinear_tf = True#trial.suggest_categorical("tfidf__sublinear_tf", [True, False])

return {
#"max_features": max_features,
Expand Down
4 changes: 2 additions & 2 deletions asreview2-optuna/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# Study variables
VERSION = 1
STUDY_SET = "demo"
STUDY_SET = "full"
PICKLE_FOLDER_PATH = Path("synergy-dataset", "pickles")
CLASSIFIER_TYPE = "nb" # Options: "nb", "log", "svm", "rf"
FEATURE_EXTRACTOR_TYPE = "tfidf" # Options: "tfidf", "onehot"
Expand Down Expand Up @@ -162,7 +162,7 @@ def process_row(row, clf_params, fe_params, ratio):
def objective_report(report_order):
def objective(trial):
# Use normal distribution for ratio (ratio effect is linear)
ratio = trial.suggest_float("ratio", 1.0, 5.0)
ratio = trial.suggest_float("ratio", 1.0, 3.0)
# ratio = 1.5
clf_params = classifier_params[CLASSIFIER_TYPE](trial)
fe_params = feature_extractor_params[FEATURE_EXTRACTOR_TYPE](trial)
Expand Down

0 comments on commit 3e7476c

Please sign in to comment.