setup full study

asreview · Jan 24, 2025 · c1be834 · c1be834
1 parent 38c3550
commit c1be834
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 7 deletions.
diff --git a/asreview2-optuna/classifiers.py b/asreview2-optuna/classifiers.py
@@ -23,9 +23,9 @@ def logistic_params(trial: optuna.trial.FrozenTrial):
 
 def svm_params(trial: optuna.trial.FrozenTrial):
     # Use logarithmic normal distribution for C (C effect is non-linear)
-    C = trial.suggest_float("svm__C", 0.01, 10, log=True)
+    C = trial.suggest_float("svm__C", 0.01, 1, log=True)
 
-    loss = trial.suggest_categorical("svm__loss", ["hinge", "squared_hinge"])
+    loss = "hinge"
 
     return {"C": C, "loss": loss}
 

diff --git a/asreview2-optuna/feature_extractors.py b/asreview2-optuna/feature_extractors.py
@@ -7,15 +7,15 @@
 def tfidf_params(trial: optuna.trial.FrozenTrial):
     #max_features = trial.suggest_int("tfidf__max_features", 200, 20_000)
 
-    max_df = trial.suggest_float("tfidf__max_df", 0.5, 1.0)
+    max_df = trial.suggest_float("tfidf__max_df", 0.65, 1.0)
 
     min_df = trial.suggest_int("tfidf__min_df", 1, 10)
 
     # trial.suggest_categorical does not support tuples, so choose max_ngram_range first, then create a tuple.
-    max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 1, 3)
+    max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 2, 3)
     ngram_range = (1, max_ngram_range)
 
-    sublinear_tf = trial.suggest_categorical("tfidf__sublinear_tf", [True, False])
+    sublinear_tf = True
 
     return {
         #"max_features": max_features,

diff --git a/asreview2-optuna/main.py b/asreview2-optuna/main.py
@@ -19,8 +19,8 @@
 from feature_extractors import feature_extractor_params, feature_extractors
 
 # Study variables
-VERSION = 2
-STUDY_SET = "demo"
+VERSION = 1
+STUDY_SET = "full"
 PICKLE_FOLDER_PATH = Path("synergy-dataset", "pickles")
 CLASSIFIER_TYPE = "svm"  # Options: "nb", "log", "svm", "rf"
 FEATURE_EXTRACTOR_TYPE = "tfidf"  # Options: "tfidf", "onehot"