asreview · jteijema · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml
@@ -5,7 +5,7 @@ jobs:
     strategy:
       matrix:
         os: [windows-latest, ubuntu-latest]
-        python-version: ['3.8', '3.12'] 
+        python-version: ['3.10', '3.12', '3.13'] 
     runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@v4
@@ -23,13 +23,13 @@ jobs:
           ${{ runner.os }}-pip-
     - name: Install dependencies
       run: |
-        pip install . ruff scitree asreview-datatools asreview-insights synergy-dataset
+        pip install . ruff scitree synergy-dataset
     - name: Lint python with ruff
       run: |
         ruff check .
     - name: Render makita templates
       run: |
-        asreview makita template basic -p basic -d .github/workflows/test_data/ --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 --init_seed 1 --model_seed 2 --skip_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output_basic.txt
+        asreview makita template basic -p basic -d .github/workflows/test_data/ --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 --prior_seed 1 --model_seed 2 --skip_wordclouds --overwrite --n_query 2 --n_stop min --balance_strategy double | tee output_basic.txt
         grep -q "ERROR" output_basic.txt && exit 1 || true
         asreview makita template arfi -p arfi -d .github/workflows/test_data/ | tee output_arfi.txt
         grep -q "ERROR" output_arfi.txt && exit 1 || true
@@ -49,9 +49,20 @@ jobs:
     - name: Execute basic template jobs file
       if: ${{ matrix.os != 'windows-latest' }}
       run: |
+        pip install asreview-datatools asreview-insights
+
+        CURRENT_ASREVIEW_VERSION=$(python -c "import asreview; print(asreview.__version__)")
+
+        if [[ $CURRENT_ASREVIEW_VERSION == 2* || $CURRENT_ASREVIEW_VERSION > 2 ]]; then
+          echo "Using asreview version $CURRENT_ASREVIEW_VERSION"
+        else
+          echo "Skipping step: incompatible asreview version $CURRENT_ASREVIEW_VERSION"
+          exit 0
+        fi
+
         mkdir basic_run
         cd basic_run
-        asreview makita template basic -d ../.github/workflows/test_data/ --instances_per_query 100 --skip_wordclouds --overwrite --n_runs 1
+        asreview makita template basic -d ../.github/workflows/test_data/ --n_query 100 --skip_wordclouds --overwrite --n_runs 1
         scitree
         sh jobs.sh
         scitree

diff --git a/README.md b/README.md
@@ -63,7 +63,7 @@ Simulation Mode](https://asreview.github.io/asreview-academy/simulation.html).
 ### Prerequisites:
 
 ```
-Requires Python 3.8 or higher.
+Requires Python 3.10 or higher.
 ```
 
 ### Install the Makita extension with pip:
@@ -156,6 +156,14 @@ with
 asreview makita template basic --job_file my_jobs_file.my_ext
 ```
 
+### ASReview Makita Helper Tool
+For users of ASReview Makita, there's a handy helper tool available. This tool
+simplifies the process of formulating your Makita commands, ensuring
+compatibility and ease of use. It's designed to work with ASReview Makita
+version 1.0 and above.
+
+[Makita helper Tool](https://asreview.github.io/asreview-makita/)
+
 ## Templates
 
 The following table gives an overview of the available templates. See [Getting
@@ -181,7 +189,7 @@ optional arguments:
   -p, --project_folder PROJECT_FOLDER       The folder the project will be rendered too     Default: The current working directory
   -d, --data_folder DATA_FOLDER             The dataset source folder                       Default: `Data` folder in working directory
   -j, --job_file JOB_FILE                   The name of the file with jobs                  Default: jobs.bat for Windows, otherwise jobs.sh.
-  --init_seed INIT_SEED                     Seed of the priors                              Seed is set to 535 by default.
+  --prior_seed prior_seed                   Seed of the priors                              Seed is set to 535 by default.
   --model_seed MODEL_SEED                   Seed of the models                              Seed is set to 165 by default.
   --template TEMPLATE                       Overwrite template with template file path 
   --platform PLATFORM                       Platform to run jobs: Windows, Darwin, Linux.   Default: the system of rendering templates.
@@ -192,8 +200,8 @@ optional arguments:
   --feature_extractor FEATURE_EXTRACTOR     Feature_extractor to use.                       Default: tfidf.
   --query_strategy QUERY_STRATEGY           Query strategy to use.                          Default: max.
   --balance_strategy BALANCE_STRATEGY       Balance strategy to use.                        Default: double.
-  --instances_per_query INSTANCES_PER_QUERY Number of instances per query.                  Default: 1.
-  --stop_if STOP_IF                         The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
+  --n_query n_query                         Number of instances per query.                  Default: 1.
+  --n_stop n_stop                           The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
 ```
 
 ### ARFI template
@@ -213,7 +221,7 @@ optional arguments:
   -p, --project_folder PROJECT_FOLDER       The folder the project will be rendered too     Default: The current working directory
   -d, --data_folder DATA_FOLDER             The dataset source folder                       Default: `Data` folder in working directory
   -j, --job_file JOB_FILE                   The name of the file with jobs                  Default: jobs.bat for Windows, otherwise jobs.sh.
-  --init_seed INIT_SEED                     Seed of the priors.                             Seed is set to 535 by default.
+  --prior_seed prior_seed                   Seed of the priors.                             Seed is set to 535 by default.
   --model_seed MODEL_SEED                   Seed of the models.                             Seed is set to 165 by default.
   --template TEMPLATE                       Overwrite template with template file path.
   --platform PLATFORM                       Platform to run jobs: Windows, Darwin, Linux.   Default: the system of rendering templates.
@@ -224,8 +232,8 @@ optional arguments:
   --feature_extractor FEATURE_EXTRACTOR     Feature_extractor to use.                       Default: tfidf.
   --query_strategy QUERY_STRATEGY           Query strategy to use.                          Default: max.
   --balance_strategy BALANCE_STRATEGY       Balance strategy to use.                        Default: double.
-  --instances_per_query INSTANCES_PER_QUERY Number of instances per query.                  Default: 1.
-  --stop_if STOP_IF                         The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
+  --n_query n_query                         Number of instances per query.                  Default: 1.
+  --n_stop n_stop                           The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
 ```
 
 ### Multimodel template
@@ -243,15 +251,15 @@ optional arguments:
   -p, --project_folder PROJECT_FOLDER       The folder the project will be rendered too     Default: The current working directory
   -d, --data_folder DATA_FOLDER             The dataset source folder                       Default: `Data` folder in working directory
   -j, --job_file JOB_FILE                   The name of the file with jobs                  Default: jobs.bat for Windows, otherwise jobs.sh.
-  --init_seed INIT_SEED                     Seed of the priors.                             Seed is set to 535 by default.
+  --prior_seed prior_seed                   Seed of the priors.                             Seed is set to 535 by default.
   --model_seed MODEL_SEED                   Seed of the models.                             Seed is set to 165 by default.
   --template TEMPLATE                       Overwrite template with template file path.
   --platform PLATFORM                       Platform to run jobs: Windows, Darwin, Linux.   Default: the system of rendering templates.
   --n_runs N_RUNS                           Number of runs.                                 Default: 1.
   --skip_wordclouds                         Disables the generation of wordclouds.
   --overwrite                               Automatically accepts all overwrite requests.
-  --instances_per_query INSTANCES_PER_QUERY Number of instances per query.                  Default: 1.
-  --stop_if STOP_IF                         The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
+  --n_query n_query                         Number of instances per query.                  Default: 1.
+  --n_stop n_stop                           The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
   --classifiers CLASSIFIERS                 Classifiers to use                              Default: ['logistic', 'nb', 'rf', 'svm']
   --feature_extractors FEATURE_EXTRACTOR    Feature extractors to use                       Default: ['doc2vec', 'sbert', 'tfidf']
   --query_strategies QUERY_STRATEGY         Query strategies to use                         Default: ['max']
@@ -300,7 +308,7 @@ optional arguments:
   -p, --project_folder PROJECT_FOLDER       The folder the project will be rendered too     Default: The current working directory
   -d, --data_folder DATA_FOLDER             The dataset source folder                       Default: `Data` folder in working directory
   -j, --job_file JOB_FILE                   The name of the file with jobs                  Default: jobs.bat for Windows, otherwise jobs.sh.
-  --init_seed INIT_SEED                     Seed of the priors.                             Seed is set to 535 by default.
+  --prior_seed prior_seed                   Seed of the priors.                             Seed is set to 535 by default.
   --model_seed MODEL_SEED                   Seed of the models.                             Seed is set to 165 by default.
   --template TEMPLATE                       Overwrite template with template file path.
   --platform PLATFORM                       Platform to run jobs: Windows, Darwin, Linux.   Default: the system of rendering templates.
@@ -311,8 +319,8 @@ optional arguments:
   --feature_extractor FEATURE_EXTRACTOR     Feature_extractor to use.                       Default: tfidf.
   --query_strategy QUERY_STRATEGY           Query strategy to use.                          Default: max.
   --balance_strategy BALANCE_STRATEGY       Balance strategy to use.                        Default: double.
-  --instances_per_query INSTANCES_PER_QUERY Number of instances per query.                  Default: 1.
-  --stop_if STOP_IF                         The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
+  --n_query n_query                         Number of instances per query.                  Default: 1.
+  --n_stop n_stop                           The number of label actions to simulate.        Default 'min' will stop simulating when all relevant records are found.
 ```
 
 #### Example usage
@@ -381,8 +389,8 @@ use it, use `-s` (source) and `-o` (output) to tweak paths.
 
 Adding a legend to the plot can be done with the `-l` or `--show_legend` flag,
 with the labels clustered on any of the following: `'filename', 'model',
-'query_strategy', 'balance_strategy', 'feature_extraction', 'n_instances',
-'stop_if', 'n_prior_included', 'n_prior_excluded', 'model_param', 'query_param',
+'query_strategy', 'balance_strategy', 'feature_extraction', 'n-query',
+'n_stop', 'n_prior_included', 'n_prior_excluded', 'model_param', 'query_param',
 'feature_param', 'balance_param'`
 
 #### Available scripts

diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py
@@ -5,16 +5,14 @@
 from pathlib import Path
 from typing import Optional
 
-from asreview import config as ASREVIEW_CONFIG
-from asreview.entry_points import BaseEntryPoint
-from asreview.utils import _entry_points
+from asreview.extensions import extensions
 
 from asreviewcontrib.makita import __version__
 from asreviewcontrib.makita.config import TEMPLATES_FP
 from asreviewcontrib.makita.utils import FileHandler
 
 
-class MakitaEntryPoint(BaseEntryPoint):
+class MakitaEntryPoint:
     description = "Makita functionality for ASReview datasets."
     extension_name = "asreview-makita"
 
@@ -55,7 +53,7 @@ def execute(self, argv):  # noqa: C901
             help="Set project folder path." "Default will use current directory.",
         )
         parser_template.add_argument(
-            "--init_seed",
+            "--prior_seed",
             type=int,
             default=535,
             help="Seed of the priors. 535 by default.",
@@ -76,13 +74,13 @@ def execute(self, argv):  # noqa: C901
             "Default: the system of rendering templates.",
         )
         parser_template.add_argument(
-            "--instances_per_query",
+            "--n_query",
             type=int,
-            default=ASREVIEW_CONFIG.DEFAULT_N_INSTANCES,
+            default=1,
             help="Number of instances per query.",
         )
         parser_template.add_argument(
-            "--stop_if",
+            "--n_stop",
             type=str,
             default="min",
             help="The number of label actions to simulate.",
@@ -152,6 +150,11 @@ def execute(self, argv):  # noqa: C901
             nargs="+",
             help="Model combinations to exclude.",
         )
+        parser_template.add_argument(
+            "--no-balance-strategy",
+            nargs="+",
+            help="Do not use a balance strategy.",
+        )
 
         parser_template.set_defaults(func=self._template_cli)
 
@@ -244,7 +247,7 @@ def _convert_job_for_platform(self, job):
 
     def _get_template_class(self, template_name):
         """Validate and load the template."""
-        entry_points = _entry_points(group="asreview.makita.templates")
+        entry_points = extensions(group="makita.templates")
         if template_name not in entry_points.names:
             raise ValueError(f"Template {template_name} not found.")
         return entry_points[template_name].load()
@@ -294,7 +297,7 @@ def _load_datasets(self):
         copied_files = []
         for dataset in datasets:
             target_path = self.paths.data_folder_path / dataset.name
-            if source_path != self.paths.data_folder_path:
+            if source_path.resolve() != self.paths.data_folder_path.resolve():
                 shutil.copyfile(dataset, target_path)
             copied_files.append(target_path)
 
@@ -306,7 +309,7 @@ def _get_template_args(self):
             "skip_wordclouds",
             "n_runs",
             "n_priors",
-            "init_seed",
+            "prior_seed",
             "model_seed",
             "classifier",
             "feature_extractor",
@@ -317,8 +320,8 @@ def _get_template_args(self):
             "query_strategies",
             "balance_strategies",
             "impossible_models",
-            "instances_per_query",
-            "stop_if",
+            "n_query",
+            "n_stop",
         ]
         return {
             key: vars(self.args).get(key)

diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py
@@ -1,8 +1,11 @@
 """Render ARFI template."""
 
+from pathlib import Path
+
 import numpy as np
-from asreview import config as ASREVIEW_CONFIG
-from asreview.data import ASReviewData
+from asreview import load_dataset
+from asreview.data import DataStore
+from asreview.models import default_model
 
 from asreviewcontrib.makita.template_base import TemplateBase
 
@@ -31,7 +34,7 @@ def get_dataset_specific_params(self, index, fp_dataset):
         n_priors = self.n_priors if self.n_priors is not None else 10
 
         priors = _get_priors(
-            fp_dataset, init_seed=self.init_seed + index, n_priors=n_priors
+            fp_dataset, prior_seed=self.prior_seed + index, n_priors=n_priors
         )
         return {
             "input_file": f"{fp_dataset.parent.name}/{fp_dataset.name}",
@@ -44,11 +47,28 @@ def get_template_specific_params(self, params):
         """Prepare template-specific parameters. These parameters are provided to the
         template only once."""
 
-        # set default values if not provided
-        classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501
-        feature_extractor = self.feature_extractor if self.feature_extractor is not None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501
-        query_strategy = self.query_strategy if self.query_strategy is not None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501
-        balance_strategy = self.balance_strategy if self.balance_strategy is not None else ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY # noqa: E501
+        ASREVIEW_CONFIG = default_model()
+
+        classifier = (
+            self.classifier
+            if self.classifier is not None
+            else ASREVIEW_CONFIG["classifier"]
+        )
+        feature_extractor = (
+            self.feature_extractor
+            if self.feature_extractor is not None
+            else ASREVIEW_CONFIG['feature_extraction']
+        )
+        query_strategy = (
+            self.query_strategy
+            if self.query_strategy is not None
+            else ASREVIEW_CONFIG['query_strategy']
+        )
+        balance_strategy = (
+            self.balance_strategy
+            if self.balance_strategy is not None
+            else ASREVIEW_CONFIG['balance_strategy']
+        )
 
         return {
             "datasets": params,
@@ -57,29 +77,34 @@ def get_template_specific_params(self, params):
             "feature_extractor": feature_extractor,
             "query_strategy": query_strategy,
             "balance_strategy": balance_strategy,
-            "instances_per_query": self.instances_per_query,
-            "stop_if": self.stop_if,
-            "init_seed": self.init_seed,
+            "n_query": self.n_query,
+            "n_stop": self.n_stop,
+            "prior_seed": self.prior_seed,
             "output_folder": self.paths.output_folder,
             "scripts_folder": self.paths.scripts_folder,
             "version": self.__version__,
         }
 
 
-def _get_priors(dataset, init_seed, n_priors):
+def _get_priors(dataset, prior_seed, n_priors):
     """Sample priors."""
-    asdata = ASReviewData.from_file(dataset)
-    relevant_record_ids = asdata.record_ids[asdata.labels == 1]
-    relevant_irrecord_ids = asdata.record_ids[asdata.labels == 0]
+
+    records = load_dataset(dataset, dataset_id=Path(dataset).name)
+    data_store = DataStore(":memory:")
+    data_store.create_tables()
+    data_store.add_records(records)
+    df = data_store.get_df()
+
+    relevant_record_ids = df.record_id[df.included == 1]
+    relevant_irrecord_ids = df.record_id[df.included == 0]
 
     if len(relevant_record_ids) == 0:
-        raise ValueError("Not enough relevant records found.")
+        raise ValueError("No relevant records found.")
     if len(relevant_irrecord_ids) == 0:
-        raise ValueError("Not enough irrelevant records found.")
+        raise ValueError("No irrelevant records found.")
 
-    np.random.seed(init_seed)
+    np.random.seed(prior_seed)
 
-    # sample n_priors irrelevant records
     prior_irrelevant = list(
         np.random.choice(relevant_irrecord_ids, n_priors, replace=False)
     )

diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py
@@ -20,21 +20,21 @@ def __init__(
         file_handler: FileHandler,
         paths: ProjectPaths,
         skip_wordclouds,
-        init_seed,
+        prior_seed,
         model_seed,
         balance_strategy,
-        instances_per_query,
-        stop_if,
+        n_query,
+        n_stop,
         **kwargs,
     ):
         self.datasets = datasets
         self.paths = paths
         self.skip_wordclouds = skip_wordclouds
-        self.init_seed = init_seed
+        self.prior_seed = prior_seed
         self.model_seed = model_seed
         self.balance_strategy = balance_strategy
-        self.instances_per_query = instances_per_query
-        self.stop_if = stop_if
+        self.n_query = n_query
+        self.n_stop = n_stop
         self.file_handler = file_handler
         self.__version__ = '.'.join(
             str(part)