diff --git a/examples/README.md b/examples/README.md index 8f151abf..606e4b5c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,16 +3,20 @@ Examples are generated with the following code (from the root of the repo): ``` +mkdir examples +mkdir examples/arfi_example synergy_dataset get -d van_de_Schoot_2018 Smid_2020 -o examples/arfi_example/data -l cd examples/arfi_example -asreview makita template arfi +asreview makita template arfi --platform linux cd ../.. +mkdir examples/basic_example synergy_dataset get -d van_de_Schoot_2018 Smid_2020 -o examples/basic_example/data -l cd examples/basic_example -asreview makita template basic +asreview makita template basic --platform linux cd ../.. +mkdir examples/multimodel_example synergy_dataset get -d van_de_Schoot_2018 Smid_2020 -o examples/multimodel_example/data -l cd examples/multimodel_example -asreview makita template multimodel +asreview makita template multimodel --platform linux cd ../.. ``` diff --git a/examples/arfi_example/README.md b/examples/arfi_example/README.md index 73fbeaf8..8b31fc94 100644 --- a/examples/arfi_example/README.md +++ b/examples/arfi_example/README.md @@ -1,6 +1,6 @@ # Simulation study -*This project was rendered with ASReview-Makita version 0.0.0.* +*This project was rendered with ASReview-Makita version 0.9.2.* This project was rendered from the Makita-ARFI template. See [asreview/asreview-makita#templates](https://github.com/asreview/asreview-makita#templates) for template rules and formats. @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -24,8 +24,8 @@ pip install asreview-wordcloud The performance on the following datasets is evaluated: -- data/van_de_Schoot_2018.csv -- data/Smid_2020.csv +- data\Smid_2020.csv +- data\van_de_Schoot_2018.csv ## Run simulation @@ -43,8 +43,8 @@ The following files are found in this project: โ”œโ”€โ”€ ๐Ÿ“œREADME.md โ”œโ”€โ”€ ๐Ÿ“œjobs.sh โ”œโ”€โ”€ ๐Ÿ“‚data - โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”‚ โ”œโ”€โ”€ ๐Ÿ“œSmid_2020.csv + โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”œโ”€โ”€ ๐Ÿ“‚scripts โ”‚ โ”œโ”€โ”€ ๐Ÿ“œget_plot.py โ”‚ โ”œโ”€โ”€ ๐Ÿ“œmerge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: โ”‚ โ””โ”€โ”€ ๐Ÿ“œ... โ””โ”€โ”€ ๐Ÿ“‚output โ”œโ”€โ”€ ๐Ÿ“‚simulation - | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 + | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... - | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 + | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... โ”œโ”€โ”€ ๐Ÿ“‚tables | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.csv | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.csv | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_summary.csv | โ””โ”€โ”€ ๐Ÿ“œmetrics_summary.xlsx โ””โ”€โ”€ ๐Ÿ“‚figures - โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_Smid_2020.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png - โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_Smid_2020.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_Smid_2020.png โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_Smid_2020.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png + โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/arfi_example/jobs.sh b/examples/arfi_example/jobs.sh index cbb07843..b6c181aa 100644 --- a/examples/arfi_example/jobs.sh +++ b/examples/arfi_example/jobs.sh @@ -1,6 +1,6 @@ -# version 0.0.0 +# version 0.9.2 # Create folder structure. By default, the folder 'output' is used to store output. mkdir output @@ -19,69 +19,69 @@ mkdir output/simulation/Smid_2020/metrics # Collect descriptives about the dataset mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json +python -m asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant # Simulate runs, collect metrics and create plots mkdir output/simulation/Smid_2020/state_files -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_31.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_121.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_122.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_216.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_520.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_526.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_672.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_763.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_810.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1000.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1063.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1195.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1203.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1257.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1429.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1534.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1809.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1820.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1876.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1877.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2067.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2070.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2241.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2276.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2279.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2307.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2452.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_31.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_121.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_122.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_216.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_520.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_526.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_672.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_763.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_810.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1000.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1063.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1195.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1203.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1257.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1429.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1534.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1809.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1820.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1876.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1877.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2067.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2070.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2241.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2276.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2279.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2307.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2452.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model @@ -97,91 +97,91 @@ mkdir output/simulation/van_de_Schoot_2018/metrics # Collect descriptives about the dataset mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json +python -m asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant # Simulate runs, collect metrics and create plots mkdir output/simulation/van_de_Schoot_2018/state_files -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_51.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_116.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_462.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_730.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_767.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_831.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_902.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_953.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1033.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1180.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1248.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1356.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1429.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1514.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1554.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1565.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1746.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1881.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1994.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2279.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2496.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2545.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2624.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2740.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2803.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3076.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3314.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3442.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3608.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3680.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3769.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3842.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4154.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4201.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4269.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4327.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4377.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4461.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_51.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_116.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_462.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_730.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_767.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_831.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_902.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_953.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1033.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1180.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1248.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1356.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1429.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1514.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1554.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1565.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1746.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1881.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1994.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2279.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2496.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2545.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2624.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2740.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2803.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3076.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3314.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3442.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3608.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3680.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3769.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3842.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4154.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4201.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4269.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4327.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4377.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4461.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model @@ -189,5 +189,5 @@ python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv # Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +python scripts/merge_descriptives.py +python scripts/merge_metrics.py diff --git a/examples/arfi_example/scripts/get_plot.py b/examples/arfi_example/scripts/get_plot.py index 7d29468d..02f9f883 100644 --- a/examples/arfi_example/scripts/get_plot.py +++ b/examples/arfi_example/scripts/get_plot.py @@ -15,63 +15,77 @@ - Teijema, Jelle """ -# version 0.0.0 +# version 0.9.2 import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/arfi_example/scripts/merge_descriptives.py b/examples/arfi_example/scripts/merge_descriptives.py index 72e1390c..3a121646 100644 --- a/examples/arfi_example/scripts/merge_descriptives.py +++ b/examples/arfi_example/scripts/merge_descriptives.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob diff --git a/examples/arfi_example/scripts/merge_metrics.py b/examples/arfi_example/scripts/merge_metrics.py index aa031461..5022167a 100644 --- a/examples/arfi_example/scripts/merge_metrics.py +++ b/examples/arfi_example/scripts/merge_metrics.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/arfi_example/scripts/merge_tds.py b/examples/arfi_example/scripts/merge_tds.py index 1beb52c6..3fc153d7 100644 --- a/examples/arfi_example/scripts/merge_tds.py +++ b/examples/arfi_example/scripts/merge_tds.py @@ -19,11 +19,12 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/examples/basic_example/README.md b/examples/basic_example/README.md index 1f2f52fc..75bd751a 100644 --- a/examples/basic_example/README.md +++ b/examples/basic_example/README.md @@ -1,6 +1,6 @@ # Simulation study -*This project was rendered with ASReview-Makita version 0.0.0.* +*This project was rendered with ASReview-Makita version 0.9.2.* This project was rendered from the Makita-basic template. See [asreview/asreview-makita#templates](https://github.com/asreview/asreview-makita#templates) for template rules and formats. @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -24,8 +24,8 @@ pip install asreview-wordcloud The performance on the following datasets is evaluated: -- data/van_de_Schoot_2018.csv -- data/Smid_2020.csv +- data\Smid_2020.csv +- data\van_de_Schoot_2018.csv ## Run simulation @@ -43,8 +43,8 @@ The following files are found in this project: โ”œโ”€โ”€ ๐Ÿ“œREADME.md โ”œโ”€โ”€ ๐Ÿ“œjobs.sh โ”œโ”€โ”€ ๐Ÿ“‚data - โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”‚ โ”œโ”€โ”€ ๐Ÿ“œSmid_2020.csv + โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”œโ”€โ”€ ๐Ÿ“‚scripts โ”‚ โ”œโ”€โ”€ ๐Ÿ“œget_plot.py โ”‚ โ”œโ”€โ”€ ๐Ÿ“œmerge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: โ”‚ โ””โ”€โ”€ ๐Ÿ“œ... โ””โ”€โ”€ ๐Ÿ“‚output โ”œโ”€โ”€ ๐Ÿ“‚simulation - | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 + | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... - | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 + | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... โ”œโ”€โ”€ ๐Ÿ“‚tables | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.csv | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.csv | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_summary.csv | โ””โ”€โ”€ ๐Ÿ“œmetrics_summary.xlsx โ””โ”€โ”€ ๐Ÿ“‚figures - โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_Smid_2020.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png - โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_Smid_2020.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_Smid_2020.png โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_Smid_2020.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png + โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/basic_example/jobs.sh b/examples/basic_example/jobs.sh index e5327cd9..27dfbb65 100644 --- a/examples/basic_example/jobs.sh +++ b/examples/basic_example/jobs.sh @@ -1,6 +1,6 @@ -# version 0.0.0 +# version 0.9.2 # Create folder structure. By default, the folder 'output' is used to store output. mkdir output @@ -21,17 +21,17 @@ mkdir output/simulation/Smid_2020/metrics # Collect descriptives about the dataset Smid_2020 mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json +python -m asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant # Simulate runs mkdir output/simulation/Smid_2020/state_files -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_0.json +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020.asreview --init_seed 535 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png @@ -48,17 +48,17 @@ mkdir output/simulation/van_de_Schoot_2018/metrics # Collect descriptives about the dataset van_de_Schoot_2018 mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json +python -m asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant # Simulate runs mkdir output/simulation/van_de_Schoot_2018/state_files -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_0.json +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018.asreview --init_seed 535 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png @@ -66,5 +66,5 @@ python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv # Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +python scripts/merge_descriptives.py +python scripts/merge_metrics.py diff --git a/examples/basic_example/scripts/get_plot.py b/examples/basic_example/scripts/get_plot.py index 7d29468d..02f9f883 100644 --- a/examples/basic_example/scripts/get_plot.py +++ b/examples/basic_example/scripts/get_plot.py @@ -15,63 +15,77 @@ - Teijema, Jelle """ -# version 0.0.0 +# version 0.9.2 import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/basic_example/scripts/merge_descriptives.py b/examples/basic_example/scripts/merge_descriptives.py index 72e1390c..3a121646 100644 --- a/examples/basic_example/scripts/merge_descriptives.py +++ b/examples/basic_example/scripts/merge_descriptives.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob diff --git a/examples/basic_example/scripts/merge_metrics.py b/examples/basic_example/scripts/merge_metrics.py index aa031461..5022167a 100644 --- a/examples/basic_example/scripts/merge_metrics.py +++ b/examples/basic_example/scripts/merge_metrics.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/basic_example/scripts/merge_tds.py b/examples/basic_example/scripts/merge_tds.py index 1beb52c6..3fc153d7 100644 --- a/examples/basic_example/scripts/merge_tds.py +++ b/examples/basic_example/scripts/merge_tds.py @@ -19,11 +19,12 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/examples/multimodel_example/README.md b/examples/multimodel_example/README.md index e0bf8f57..5d4864f4 100644 --- a/examples/multimodel_example/README.md +++ b/examples/multimodel_example/README.md @@ -1,6 +1,6 @@ # Simulation study -*This project was rendered with ASReview-Makita version 0.0.0.* +*This project was rendered with ASReview-Makita version 0.9.2.* This project was rendered from the Makita-multimodel template. See [asreview/asreview-makita#templates](https://github.com/asreview/asreview-makita#templates) for template rules and formats. @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -24,8 +24,8 @@ pip install asreview-wordcloud The performance on the following datasets is evaluated: -- data/van_de_Schoot_2018.csv -- data/Smid_2020.csv +- data\Smid_2020.csv +- data\van_de_Schoot_2018.csv ## Run simulation @@ -43,8 +43,8 @@ The following files are found in this project: โ”œโ”€โ”€ ๐Ÿ“œREADME.md โ”œโ”€โ”€ ๐Ÿ“œjobs.sh โ”œโ”€โ”€ ๐Ÿ“‚data - โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”‚ โ”œโ”€โ”€ ๐Ÿ“œSmid_2020.csv + โ”‚ โ”œโ”€โ”€ ๐Ÿ“œvan_de_Schoot_2018.csv โ”œโ”€โ”€ ๐Ÿ“‚scripts โ”‚ โ”œโ”€โ”€ ๐Ÿ“œget_plot.py โ”‚ โ”œโ”€โ”€ ๐Ÿ“œmerge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: โ”‚ โ””โ”€โ”€ ๐Ÿ“œ... โ””โ”€โ”€ ๐Ÿ“‚output โ”œโ”€โ”€ ๐Ÿ“‚simulation - | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 + | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... - | โ””โ”€โ”€ ๐Ÿ“‚Smid_2020 + | โ””โ”€โ”€ ๐Ÿ“‚van_de_Schoot_2018 | โ”œโ”€โ”€ ๐Ÿ“‚descriptives - | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_Smid_2020.json + | | โ””โ”€โ”€ ๐Ÿ“œdata_stats_van_de_Schoot_2018.json | โ”œโ”€โ”€ ๐Ÿ“‚state_files - | | โ”œโ”€โ”€ ๐Ÿ“œsim_Smid_2020_`x`.asreview + | | โ”œโ”€โ”€ ๐Ÿ“œsim_van_de_Schoot_2018_`x`.asreview | | โ””โ”€โ”€ ๐Ÿ“œ... | โ””โ”€โ”€ ๐Ÿ“‚metrics - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_`x`.json + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_`x`.json | โ””โ”€โ”€ ๐Ÿ“œ... โ”œโ”€โ”€ ๐Ÿ“‚tables | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.csv | โ”œโ”€โ”€ ๐Ÿ“œdata_descriptives.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv - | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_Smid_2020.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.csv + | โ”œโ”€โ”€ ๐Ÿ“œtds_sim_van_de_Schoot_2018.xlsx | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.csv | โ”œโ”€โ”€ ๐Ÿ“œtds_summary.xlsx - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv - | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.csv | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_Smid_2020_metrics.xlsx + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.csv + | โ”œโ”€โ”€ ๐Ÿ“œmetrics_sim_van_de_Schoot_2018_metrics.xlsx | โ”œโ”€โ”€ ๐Ÿ“œmetrics_summary.csv | โ””โ”€โ”€ ๐Ÿ“œmetrics_summary.xlsx โ””โ”€โ”€ ๐Ÿ“‚figures - โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_Smid_2020.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png - โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png - โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆplot_recall_van_de_Schoot_2018.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_Smid_2020.png โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_Smid_2020.png โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_Smid_2020.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_van_de_Schoot_2018.png + โ”œโ”€โ”€ ๐Ÿ“ˆwordcloud_relevant_van_de_Schoot_2018.png + โ””โ”€โ”€ ๐Ÿ“ˆwordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/multimodel_example/jobs.sh b/examples/multimodel_example/jobs.sh index 9e6c1894..c9a2fb34 100644 --- a/examples/multimodel_example/jobs.sh +++ b/examples/multimodel_example/jobs.sh @@ -1,5 +1,5 @@ -# version 0.0.0 +# version 0.9.2 # Create folder structure. By default, the folder 'output' is used to store output. mkdir output @@ -19,61 +19,49 @@ mkdir output/simulation/Smid_2020/metrics # Collect descriptives about the dataset Smid_2020 mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json +python -m asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant # Simulate runs mkdir output/simulation/Smid_2020/state_files -# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_doc2vec_0.json +# Classifier = logistic, Feature extractor = doc2vec, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_max_double.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_doc2vec_max_double.json -# Classifier = logistic, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_sbert_0.json +# Classifier = logistic, Feature extractor = sbert, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_max_double.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_sbert_max_double.json -# Classifier = logistic, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_tfidf_0.json +# Classifier = logistic, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_max_double.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_tfidf_max_double.json -# Skipped nb + doc2vec model +# Skipped nb + doc2vec + max model -# Skipped nb + sbert model +# Skipped nb + sbert + max model -# Classifier = nb, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_nb_tfidf_0.json +# Classifier = nb, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_max_double.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_nb_tfidf_max_double.json -# Classifier = rf, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_doc2vec_0.json +# Classifier = rf, Feature extractor = doc2vec, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_max_double.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_doc2vec_max_double.json -# Classifier = rf, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_sbert_0.json +# Classifier = rf, Feature extractor = sbert, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_max_double.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_sbert_max_double.json -# Classifier = rf, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_tfidf_0.json - -# Classifier = svm, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_doc2vec_0.json - -# Classifier = svm, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_sbert_0.json - -# Classifier = svm, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_tfidf_0.json +# Classifier = rf, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_max_double.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_max_double.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_tfidf_max_double.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model @@ -90,61 +78,49 @@ mkdir output/simulation/van_de_Schoot_2018/metrics # Collect descriptives about the dataset van_de_Schoot_2018 mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json +python -m asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json # Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant # Simulate runs mkdir output/simulation/van_de_Schoot_2018/state_files -# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json - -# Classifier = logistic, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json - -# Classifier = logistic, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json - +# Classifier = logistic, Feature extractor = doc2vec, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_max_double.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_doc2vec_max_double.json -# Skipped nb + doc2vec model +# Classifier = logistic, Feature extractor = sbert, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_max_double.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_sbert_max_double.json +# Classifier = logistic, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_max_double.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_tfidf_max_double.json -# Skipped nb + sbert model -# Classifier = nb, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json +# Skipped nb + doc2vec + max model -# Classifier = rf, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json -# Classifier = rf, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_sbert_0.json +# Skipped nb + sbert + max model -# Classifier = rf, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json +# Classifier = nb, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_max_double.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_nb_tfidf_max_double.json -# Classifier = svm, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json +# Classifier = rf, Feature extractor = doc2vec, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_max_double.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_doc2vec_max_double.json -# Classifier = svm, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_sbert_0.json +# Classifier = rf, Feature extractor = sbert, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_max_double.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_sbert_max_double.json -# Classifier = svm, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json +# Classifier = rf, Feature extractor = tfidf, Query strategy = max, Balance strategy = double +python -m asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_max_double.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_max_double.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_tfidf_max_double.json # Generate plot and tables for dataset python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model @@ -152,5 +128,5 @@ python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv # Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +python scripts/merge_descriptives.py +python scripts/merge_metrics.py diff --git a/examples/multimodel_example/scripts/get_plot.py b/examples/multimodel_example/scripts/get_plot.py index 7d29468d..02f9f883 100644 --- a/examples/multimodel_example/scripts/get_plot.py +++ b/examples/multimodel_example/scripts/get_plot.py @@ -15,63 +15,77 @@ - Teijema, Jelle """ -# version 0.0.0 +# version 0.9.2 import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/multimodel_example/scripts/merge_descriptives.py b/examples/multimodel_example/scripts/merge_descriptives.py index 72e1390c..3a121646 100644 --- a/examples/multimodel_example/scripts/merge_descriptives.py +++ b/examples/multimodel_example/scripts/merge_descriptives.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob diff --git a/examples/multimodel_example/scripts/merge_metrics.py b/examples/multimodel_example/scripts/merge_metrics.py index aa031461..5022167a 100644 --- a/examples/multimodel_example/scripts/merge_metrics.py +++ b/examples/multimodel_example/scripts/merge_metrics.py @@ -18,7 +18,7 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/multimodel_example/scripts/merge_tds.py b/examples/multimodel_example/scripts/merge_tds.py index 1beb52c6..3fc153d7 100644 --- a/examples/multimodel_example/scripts/merge_tds.py +++ b/examples/multimodel_example/scripts/merge_tds.py @@ -19,11 +19,12 @@ - De Bruin, Jonathan """ -# version 0.0.0 +# version 0.9.2 import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx"))