Skip to content

Commit

Permalink
change init seed to prior seed
Browse files Browse the repository at this point in the history
  • Loading branch information
jteijema committed Jan 9, 2025
1 parent 6fd271f commit cec5968
Show file tree
Hide file tree
Showing 11 changed files with 19 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
ruff check .
- name: Render makita templates
run: |
asreview makita template basic -p basic -d .github/workflows/test_data/ --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 --init_seed 1 --model_seed 2 --skip_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output_basic.txt
asreview makita template basic -p basic -d .github/workflows/test_data/ --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 --prior_seed 1 --model_seed 2 --skip_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output_basic.txt
grep -q "ERROR" output_basic.txt && exit 1 || true
asreview makita template arfi -p arfi -d .github/workflows/test_data/ | tee output_arfi.txt
grep -q "ERROR" output_arfi.txt && exit 1 || true
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ optional arguments:
-p, --project_folder PROJECT_FOLDER The folder the project will be rendered too Default: The current working directory
-d, --data_folder DATA_FOLDER The dataset source folder Default: `Data` folder in working directory
-j, --job_file JOB_FILE The name of the file with jobs Default: jobs.bat for Windows, otherwise jobs.sh.
--init_seed INIT_SEED Seed of the priors Seed is set to 535 by default.
--prior_seed prior_seed Seed of the priors Seed is set to 535 by default.
--model_seed MODEL_SEED Seed of the models Seed is set to 165 by default.
--template TEMPLATE Overwrite template with template file path
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
Expand Down Expand Up @@ -213,7 +213,7 @@ optional arguments:
-p, --project_folder PROJECT_FOLDER The folder the project will be rendered too Default: The current working directory
-d, --data_folder DATA_FOLDER The dataset source folder Default: `Data` folder in working directory
-j, --job_file JOB_FILE The name of the file with jobs Default: jobs.bat for Windows, otherwise jobs.sh.
--init_seed INIT_SEED Seed of the priors. Seed is set to 535 by default.
--prior_seed prior_seed Seed of the priors. Seed is set to 535 by default.
--model_seed MODEL_SEED Seed of the models. Seed is set to 165 by default.
--template TEMPLATE Overwrite template with template file path.
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
Expand Down Expand Up @@ -243,7 +243,7 @@ optional arguments:
-p, --project_folder PROJECT_FOLDER The folder the project will be rendered too Default: The current working directory
-d, --data_folder DATA_FOLDER The dataset source folder Default: `Data` folder in working directory
-j, --job_file JOB_FILE The name of the file with jobs Default: jobs.bat for Windows, otherwise jobs.sh.
--init_seed INIT_SEED Seed of the priors. Seed is set to 535 by default.
--prior_seed prior_seed Seed of the priors. Seed is set to 535 by default.
--model_seed MODEL_SEED Seed of the models. Seed is set to 165 by default.
--template TEMPLATE Overwrite template with template file path.
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
Expand Down Expand Up @@ -300,7 +300,7 @@ optional arguments:
-p, --project_folder PROJECT_FOLDER The folder the project will be rendered too Default: The current working directory
-d, --data_folder DATA_FOLDER The dataset source folder Default: `Data` folder in working directory
-j, --job_file JOB_FILE The name of the file with jobs Default: jobs.bat for Windows, otherwise jobs.sh.
--init_seed INIT_SEED Seed of the priors. Seed is set to 535 by default.
--prior_seed prior_seed Seed of the priors. Seed is set to 535 by default.
--model_seed MODEL_SEED Seed of the models. Seed is set to 165 by default.
--template TEMPLATE Overwrite template with template file path.
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
Expand Down
4 changes: 2 additions & 2 deletions asreviewcontrib/makita/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def execute(self, argv): # noqa: C901
help="Set project folder path." "Default will use current directory.",
)
parser_template.add_argument(
"--init_seed",
"--prior_seed",
type=int,
default=535,
help="Seed of the priors. 535 by default.",
Expand Down Expand Up @@ -304,7 +304,7 @@ def _get_template_args(self):
"skip_wordclouds",
"n_runs",
"n_priors",
"init_seed",
"prior_seed",
"model_seed",
"classifier",
"feature_extractor",
Expand Down
8 changes: 4 additions & 4 deletions asreviewcontrib/makita/template_arfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_dataset_specific_params(self, index, fp_dataset):
n_priors = self.n_priors if self.n_priors is not None else 10

priors = _get_priors(
fp_dataset, init_seed=self.init_seed + index, n_priors=n_priors
fp_dataset, prior_seed=self.prior_seed + index, n_priors=n_priors
)
return {
"input_file": f"{fp_dataset.parent.name}/{fp_dataset.name}",
Expand Down Expand Up @@ -62,14 +62,14 @@ def get_template_specific_params(self, params):
"balance_strategy": balance_strategy,
"instances_per_query": self.instances_per_query,
"stop_if": self.stop_if,
"init_seed": self.init_seed,
"prior_seed": self.prior_seed,
"output_folder": self.paths.output_folder,
"scripts_folder": self.paths.scripts_folder,
"version": self.__version__,
}


def _get_priors(dataset, init_seed, n_priors):
def _get_priors(dataset, prior_seed, n_priors):
"""Sample priors."""

records = load_dataset(dataset, dataset_id=Path(dataset).name)
Expand All @@ -86,7 +86,7 @@ def _get_priors(dataset, init_seed, n_priors):
if len(relevant_irrecord_ids) == 0:
raise ValueError("No irrelevant records found.")

np.random.seed(init_seed)
np.random.seed(prior_seed)

prior_irrelevant = list(
np.random.choice(relevant_irrecord_ids, n_priors, replace=False)
Expand Down
4 changes: 2 additions & 2 deletions asreviewcontrib/makita/template_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(
file_handler: FileHandler,
paths: ProjectPaths,
skip_wordclouds,
init_seed,
prior_seed,
model_seed,
balance_strategy,
instances_per_query,
Expand All @@ -30,7 +30,7 @@ def __init__(
self.datasets = datasets
self.paths = paths
self.skip_wordclouds = skip_wordclouds
self.init_seed = init_seed
self.prior_seed = prior_seed
self.model_seed = model_seed
self.balance_strategy = balance_strategy
self.instances_per_query = instances_per_query
Expand Down
2 changes: 1 addition & 1 deletion asreviewcontrib/makita/template_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_dataset_specific_params(self, index, fp_dataset):
"input_file": f"{fp_dataset.parent.name}/{fp_dataset.name}",
"input_file_stem": fp_dataset.stem,
"model_seed": self.model_seed + index,
"init_seed": self.init_seed,
"prior_seed": self.prior_seed,
}

def get_template_specific_params(self, params):
Expand Down
2 changes: 1 addition & 1 deletion asreviewcontrib/makita/template_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_dataset_specific_params(self, index, fp_dataset):
"input_file": f"{fp_dataset.parent.name}/{fp_dataset.name}",
"input_file_stem": fp_dataset.stem,
"model_seed": self.model_seed + index,
"init_seed": self.init_seed,
"prior_seed": self.prior_seed,
}

def get_template_specific_params(self, params):
Expand Down
2 changes: 1 addition & 1 deletion asreviewcontrib/makita/template_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def get_template_specific_params(self, params):
"scripts_folder": self.paths.scripts_folder,
"version": self.__version__,
"model_seed": self.model_seed,
"init_seed": self.init_seed,
"prior_seed": self.prior_seed,
"filepath_with_priors":
f"{filepath_with_priors.parent.name}/{filepath_with_priors.name}",
"filepath_with_priors_stem": filepath_with_priors.stem,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/fig
# Simulate runs
mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files
{% for run in range(n_runs) %}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview simulate {{ dataset.input_file }} -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --prior-seed {{ dataset.prior_seed + run }} --seed {{ dataset.model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.json
{% endfor %}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files
# Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}, Balance strategy = {{balance_strategy}}
{% for run in range(n_runs) %}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ balance_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ balance_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --prior_seed {{ dataset.prior_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ balance_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ balance_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.json
{% endfor %}{% endif %}
{% endfor %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ python -m asreview wordcloud {{ filepath_without_priors }} -o {{ output_folder }
python -m asreview simulate {{ filepath_with_priors }} -s {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} --prior_idx {{ prior_idx }}
python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_custom_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json

python -m asreview simulate {{ filepath_without_priors }} -s {{ output_folder }}/simulation/state_files/sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ init_seed + run }} --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview simulate {{ filepath_without_priors }} -s {{ output_folder }}/simulation/state_files/sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --prior_seed {{ prior_seed + run }} --seed {{ model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/state_files/sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/metrics/metrics_sim_minimal_priors{{ "_{}".format(run) if n_runs > 1 else "" }}.json

{% endfor %}
Expand Down

0 comments on commit cec5968

Please sign in to comment.