Skip to content

Commit

Permalink
setup old-defaults study
Browse files Browse the repository at this point in the history
  • Loading branch information
timovdk committed Jan 29, 2025
1 parent 2465ff7 commit dfe529d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 13 deletions.
2 changes: 1 addition & 1 deletion asreview2-optuna/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def naive_bayes_params(trial: optuna.trial.FrozenTrial):
# Use logarithmic normal distribution for alpha (alpha effect is non-linear)
alpha = trial.suggest_float("alpha", 0.5, 50, log=True)
alpha = 3.822 #trial.suggest_float("alpha", 0.5, 50, log=True)
#alpha = trial.suggest_float("nb__alpha", 1.0, 15.0)
return {"alpha": alpha}

Expand Down
17 changes: 9 additions & 8 deletions asreview2-optuna/feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@
def tfidf_params(trial: optuna.trial.FrozenTrial):
#max_features = trial.suggest_int("tfidf__max_features", 15_000, 50_000)

max_df = trial.suggest_float("tfidf__max_df", 0.7, 1.0)
#max_df = trial.suggest_float("tfidf__max_df", 0.7, 1.0)

min_df = trial.suggest_int("tfidf__min_df", 2, 4)
#min_df = trial.suggest_int("tfidf__min_df", 2, 4)

#max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 1, 3)
ngram_range = (1, 2)
#ngram_range = (1, 2)

sublinear_tf = True#trial.suggest_categorical("tfidf__sublinear_tf", [True, False])
#sublinear_tf = True#trial.suggest_categorical("tfidf__sublinear_tf", [True, False])

return {
#"max_features": max_features,
"max_df": max_df,
"min_df": min_df,
"ngram_range": ngram_range,
"sublinear_tf": sublinear_tf,
#"max_df": max_df,
#"min_df": min_df,
#"ngram_range": ngram_range,
#sublinear_tf": sublinear_tf,
#"ngram_range": (1, 1),
}


Expand Down
8 changes: 4 additions & 4 deletions asreview2-optuna/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
from feature_extractors import feature_extractor_params, feature_extractors

# Study variables
VERSION = 1
VERSION = 2
STUDY_SET = "full"
CLASSIFIER_TYPE = "nb" # Options: "nb", "log", "svm", "rf"
FEATURE_EXTRACTOR_TYPE = "labse" # Options: "tfidf", "onehot", "labse", "bge-m3"
FEATURE_EXTRACTOR_TYPE = "tfidf" # Options: "tfidf", "onehot", "labse", "bge-m3"
PICKLE_FOLDER_PATH = Path("synergy-dataset", f"pickles_{FEATURE_EXTRACTOR_TYPE}")
PRE_PROCESSED_FMS = True # False = on the fly
PRE_PROCESSED_FMS = False # False = on the fly
PARALLELIZE_OBJECTIVE = True

# Optuna variables
Expand Down Expand Up @@ -164,7 +164,7 @@ def process_row(row, clf_params, fe_params, ratio):
def objective_report(report_order):
def objective(trial):
# Use normal distribution for ratio (ratio effect is linear)
ratio = trial.suggest_float("ratio", 1.0, 2.0)
ratio = trial.suggest_float("ratio", 1.0, 10.0)
# ratio = 1.5
clf_params = classifier_params[CLASSIFIER_TYPE](trial)
fe_params = (
Expand Down

0 comments on commit dfe529d

Please sign in to comment.