publications.bib


@article{botvinik-nezer_reproducibility_2023,
	title = {Reproducibility in {Neuroimaging} {Analysis}: {Challenges} and {Solutions}},
	volume = {8},
	issn = {24519022},
	shorttitle = {Reproducibility in {Neuroimaging} {Analysis}},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S245190222200341X},
	doi = {10.1016/j.bpsc.2022.12.006},
	abstract = {Recent years have marked a renaissance in efforts to increase research reproducibility in psychology, neuroscience, and related ﬁelds. Reproducibility is the cornerstone of a solid foundation of fundamental research—one that will support new theories built on valid ﬁndings and technological innovation that works. The increased focus on reproducibility has made the barriers to it increasingly apparent, along with the development of new tools and practices to overcome these barriers. Here, we review challenges, solutions, and emerging best practices with a particular emphasis on neuroimaging studies. We distinguish 3 main types of reproducibility, discussing each in turn. Analytical reproducibility is the ability to reproduce ﬁndings using the same data and methods. Replicability is the ability to ﬁnd an effect in new datasets, using the same or similar methods. Finally, robustness to analytical variability refers to the ability to identify a ﬁnding consistently across variation in methods. The incorporation of these tools and practices will result in more reproducible, replicable, and robust psychological and brain research and a stronger scientiﬁc foundation across ﬁelds of inquiry.},
	language = {en},
	number = {8},
	urldate = {2024-01-19},
	journal = {Biological Psychiatry: Cognitive Neuroscience and Neuroimaging},
	author = {Botvinik-Nezer, Rotem and Wager, Tor D.},
	month = aug,
	year = {2023},
	keywords = {Important, Review},
	pages = {780--788},
	file = {Botvinik-Nezer and Wager - 2023 - Reproducibility in Neuroimaging Analysis Challeng.pdf:/home/alpron/Zotero/storage/HE2F52IP/Botvinik-Nezer and Wager - 2023 - Reproducibility in Neuroimaging Analysis Challeng.pdf:application/pdf},
}

@article{wagner_fairly_2022,
	title = {{FAIRly} big: {A} framework for computationally reproducible processing of large-scale data},
	volume = {9},
	issn = {2052-4463},
	shorttitle = {{FAIRly} big},
	url = {https://www.nature.com/articles/s41597-022-01163-2},
	doi = {10.1038/s41597-022-01163-2},
	abstract = {Abstract
            Large-scale datasets present unique opportunities to perform scientific investigations with unprecedented breadth. However, they also pose considerable challenges for the findability, accessibility, interoperability, and reusability (FAIR) of research outcomes due to infrastructure limitations, data usage constraints, or software license restrictions. Here we introduce a DataLad-based, domain-agnostic framework suitable for reproducible data processing in compliance with open science mandates. The framework attempts to minimize platform idiosyncrasies and performance-related complexities. It affords the capture of machine-actionable computational provenance records that can be used to retrace and verify the origins of research outcomes, as well as be re-executed independent of the original computing infrastructure. We demonstrate the framework’s performance using two showcases: one highlighting data sharing and transparency (using the studyforrest.org dataset) and another highlighting scalability (using the largest public brain imaging dataset available: the UK Biobank dataset).},
	language = {en},
	number = {1},
	urldate = {2024-01-03},
	journal = {Scientific Data},
	author = {Wagner, Adina S. and Waite, Laura K. and Wierzba, Małgorzata and Hoffstaedter, Felix and Waite, Alexander Q. and Poldrack, Benjamin and Eickhoff, Simon B. and Hanke, Michael},
	month = mar,
	year = {2022},
	note = {Number: 1},
	keywords = {datalad, Important},
	pages = {80},
	file = {Wagner et al. - 2022 - FAIRly big A framework for computationally reprod.pdf:/home/alpron/Zotero/storage/JQW82RZ9/Wagner et al. - 2022 - FAIRly big A framework for computationally reprod.pdf:application/pdf},
}

@article{halchenko_datalad_2021,
	title = {{DataLad}: distributed system for joint management of code, data, and their relationship},
	volume = {6},
	issn = {2475-9066},
	shorttitle = {{DataLad}},
	url = {https://joss.theoj.org/papers/10.21105/joss.03262},
	doi = {10.21105/joss.03262},
	number = {63},
	urldate = {2024-01-03},
	journal = {Journal of Open Source Software},
	author = {Halchenko, Yaroslav and Meyer, Kyle and Poldrack, Benjamin and Solanky, Debanjum and Wagner, Adina and Gors, Jason and MacFarlane, Dave and Pustina, Dorian and Sochat, Vanessa and Ghosh, Satrajit and Mönch, Christian and Markiewicz, Christopher and Waite, Laura and Shlyakhter, Ilya and De La Vega, Alejandro and Hayashi, Soichi and Häusler, Christian and Poline, Jean-Baptiste and Kadelka, Tobias and Skytén, Kusti and Jarecka, Dorota and Kennedy, David and Strauss, Ted and Cieslak, Matt and Vavra, Peter and Ioanas, Horea-Ioan and Schneider, Robin and Pflüger, Mika and Haxby, James and Eickhoff, Simon and Hanke, Michael},
	month = jul,
	year = {2021},
	note = {Number: 63},
	keywords = {datalad},
	pages = {3262},
	file = {Halchenko et al. - 2021 - DataLad distributed system for joint management o.pdf:/home/alpron/Zotero/storage/JWK96IST/Halchenko et al. - 2021 - DataLad distributed system for joint management o.pdf:application/pdf},
}

@article{colliot_reproducibility_2024,
	title = {Reproducibility in medical image computing: what is it and how is it assessed?},
	abstract = {Medical image computing (MIC) is devoted to computational methods for analysis of medical imaging data and their assessment through experiments. It is thus an experimental science. Reproducibility is a cornerstone of progress in all experimental sciences. As in many other fields, there are major concerns that reproducibility is unsatisfactory in MIC. However, reproducibility is not a single concept but a spectrum, which is often misunderstood by researchers. Moreover, even though some measures have been put in place to promote reproducibility in the MIC community, it is unclear if they have been effective so far.},
	language = {en},
	journal = {Open Review 3fIXW9mFfn},
	author = {Colliot, Olivier and Thibeau-Sutre, Elina and Brianceau, Camille and Burgos, Ninon},
	year = {2024},
	file = {Colliot et al. - Reproducibility in medical image computing what i.pdf:/home/alpron/Zotero/storage/XCGVIGEI/Colliot et al. - Reproducibility in medical image computing what i.pdf:application/pdf},
}

@article{chekroud_illusory_2024,
	title = {Illusory generalizability of clinical prediction models},
	volume = {383},
	issn = {0036-8075, 1095-9203},
	url = {https://www.science.org/doi/10.1126/science.adg8538},
	doi = {10.1126/science.adg8538},
	abstract = {It is widely hoped that statistical models can improve decision-making related to medical treatments. Because of the cost and scarcity of medical outcomes data, this hope is typically based on investigators observing a model’s success in one or two datasets or clinical contexts. We scrutinized this optimism by examining how well a machine learning model performed across several independent clinical trials of antipsychotic medication for schizophrenia. Models predicted patient outcomes with high accuracy within the trial in which the model was developed but performed no better than chance when applied out-of-sample. Pooling data across trials to predict outcomes in the trial left out did not improve predictions. These results suggest that models predicting treatment outcomes in schizophrenia are highly context-dependent and may have limited generalizability.
          , 
            Editor’s summary
            
              A central promise of artificial intelligence (AI) in healthcare is that large datasets can be mined to predict and identify the best course of care for future patients. Unfortunately, we do not know how these models would perform on new patients because they are rarely tested prospectively on truly independent patient samples. Chekroud
              et al
              . showed that machine learning models routinely achieve perfect performance in one dataset even when that dataset is a large international multisite clinical trial (see the Perspective by Petzschner). However, when that exact model was tested in truly independent clinical trials, performance fell to chance levels. Even when building what should be a more robust model by aggregating across a group of similar multisite trials, subsequent predictive performance remained poor. —Peter Stern
            
          , 
            Clinical prediction models that work in one trial do not work in future trials of the same condition and same treatments.},
	language = {en},
	number = {6679},
	urldate = {2024-01-15},
	journal = {Science},
	author = {Chekroud, Adam M. and Hawrilenko, Matt and Loho, Hieronimus and Bondar, Julia and Gueorguieva, Ralitza and Hasan, Alkomiet and Kambeitz, Joseph and Corlett, Philip R. and Koutsouleris, Nikolaos and Krumholz, Harlan M. and Krystal, John H. and Paulus, Martin},
	month = jan,
	year = {2024},
	note = {Number: 6679},
	keywords = {Important},
	pages = {164--167},
	file = {Chekroud et al. - 2024 - Illusory generalizability of clinical prediction m.pdf:/home/alpron/Zotero/storage/WRRD7K3B/Chekroud et al. - 2024 - Illusory generalizability of clinical prediction m.pdf:application/pdf},
}

@book{elisadecastroguerra_vers_2024,
	title = {Vers une recherche reproductible},
	url = {https://bookdown.org/alegrand/bookdown/},
	abstract = {Livre d’introduction à la recherche reproductible rédigé lors d’un booksprint.},
	urldate = {2024-01-16},
	author = {Elisa de Castro Guerra, Sabrina Granger, Boris Hejblum, Arnaud Legrand, Pascal Pernot, Nicolas Rougier Facilitatrice :, Loïc Desquilbet},
	month = jan,
	year = {2024},
	file = {Snapshot:/home/alpron/Zotero/storage/7VJEDL93/bookrr.html:text/html},
}

@article{noauthor_containers_2023,
	title = {Containers for computational reproducibility},
	volume = {3},
	issn = {2662-8449},
	url = {https://doi.org/10.1038/s43586-023-00244-9},
	doi = {10.1038/s43586-023-00244-9},
	abstract = {This PrimeView highlights the range of applications benefiting from the use of containers for reproducibility of computational data analysis.},
	number = {1},
	journal = {Nature Reviews Methods Primers},
	month = jul,
	year = {2023},
	pages = {51},
}

@article{vogt_reproducibility_2023,
	title = {Reproducibility in {MRI}},
	volume = {20},
	issn = {1548-7091, 1548-7105},
	url = {https://www.nature.com/articles/s41592-022-01737-3},
	doi = {10.1038/s41592-022-01737-3},
	language = {en},
	number = {1},
	urldate = {2024-01-19},
	journal = {Nature Methods},
	author = {Vogt, Nina},
	month = jan,
	year = {2023},
	pages = {34--34},
}

@article{botvinik-nezer_fmri_2019,
	title = {{fMRI} data of mixed gambles from the {Neuroimaging} {Analysis} {Replication} and {Prediction} {Study}},
	volume = {6},
	issn = {2052-4463},
	url = {https://www.nature.com/articles/s41597-019-0113-7},
	doi = {10.1038/s41597-019-0113-7},
	abstract = {Abstract
            There is an ongoing debate about the replicability of neuroimaging research. It was suggested that one of the main reasons for the high rate of false positive results is the many degrees of freedom researchers have during data analysis. In the Neuroimaging Analysis Replication and Prediction Study (NARPS), we aim to provide the first scientific evidence on the variability of results across analysis teams in neuroscience. We collected fMRI data from 108 participants during two versions of the mixed gambles task, which is often used to study decision-making under risk. For each participant, the dataset includes an anatomical (T1 weighted) scan and fMRI as well as behavioral data from four runs of the task. The dataset is shared through OpenNeuro and is formatted according to the Brain Imaging Data Structure (BIDS) standard. Data pre-processed with fMRIprep and quality control reports are also publicly shared. This dataset can be used to study decision-making under risk and to test replicability and interpretability of previous results in the field.},
	language = {en},
	number = {1},
	urldate = {2024-01-22},
	journal = {Scientific Data},
	author = {Botvinik-Nezer, Rotem and Iwanir, Roni and Holzmeister, Felix and Huber, Jürgen and Johannesson, Magnus and Kirchler, Michael and Dreber, Anna and Camerer, Colin F. and Poldrack, Russell A. and Schonberg, Tom},
	month = jul,
	year = {2019},
	keywords = {NARPS},
	pages = {106},
	file = {Botvinik-Nezer et al. - 2019 - fMRI data of mixed gambles from the Neuroimaging A.pdf:/home/alpron/Zotero/storage/7P9AT6CT/Botvinik-Nezer et al. - 2019 - fMRI data of mixed gambles from the Neuroimaging A.pdf:application/pdf},
}

@article{zhao_reproducible_2024,
	title = {A reproducible and generalizable software workflow for analysis of large-scale neuroimaging data collections using {BIDS} {Apps}},
	volume = {2},
	issn = {2837-6056},
	url = {https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00074/119046/A-reproducible-and-generalizable-software-workflow},
	doi = {10.1162/imag_a_00074},
	abstract = {Abstract
            Neuroimaging research faces a crisis of reproducibility. With massive sample sizes and greater data complexity, this problem becomes more acute. Software that operates on imaging data defined using the Brain Imaging Data Structure (BIDS)—the BIDS App—has provided a substantial advance. However, even using BIDS Apps, a full audit trail of data processing is a necessary prerequisite for fully reproducible research. Obtaining a faithful record of the audit trail is challenging—especially for large datasets. Recently, the FAIRly big framework was introduced as a way to facilitate reproducible processing of large-scale data by leveraging DataLad—a version control system for data management. However, the current implementation of this framework was more of a proof of concept, and could not be immediately reused by other investigators for different use cases. Here, we introduce the BIDS App Bootstrap (BABS), a user-friendly and generalizable Python package for reproducible image processing at scale. BABS facilitates the reproducible application of BIDS Apps to large-scale datasets. Leveraging DataLad and the FAIRly big framework, BABS tracks the full audit trail of data processing in a scalable way by automatically preparing all scripts necessary for data processing and version tracking on high performance computing (HPC) systems. Currently, BABS supports jobs submissions and audits on Sun Grid Engine (SGE) and Slurm HPCs with a parsimonious set of programs. To demonstrate its scalability, we applied BABS to data from the Healthy Brain Network (HBN; n = 2,565). Taken together, BABS allows reproducible and scalable image processing and is broadly extensible via an open-source development model.},
	language = {en},
	urldate = {2024-02-09},
	journal = {Imaging Neuroscience},
	author = {Zhao, Chenying and Jarecka, Dorota and Covitz, Sydney and Chen, Yibei and Eickhoff, Simon B. and Fair, Damien A. and Franco, Alexandre R. and Halchenko, Yaroslav O. and Hendrickson, Timothy J. and Hoffstaedter, Felix and Houghton, Audrey and Kiar, Gregory and Macdonald, Austin and Mehta, Kahini and Milham, Michael P. and Salo, Taylor and Hanke, Michael and Ghosh, Satrajit S. and Cieslak, Matthew and Satterthwaite, Theodore D.},
	month = jan,
	year = {2024},
	keywords = {Important},
	pages = {1--19},
	file = {Zhao et al. - 2024 - A reproducible and generalizable software workflow.pdf:/home/alpron/Zotero/storage/SLA62NAB/Zhao et al. - 2024 - A reproducible and generalizable software workflow.pdf:application/pdf},
}

@article{niso_open_2022,
	title = {Open and reproducible neuroimaging: {From} study inception to publication},
	volume = {263},
	issn = {10538119},
	shorttitle = {Open and reproducible neuroimaging},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1053811922007388},
	doi = {10.1016/j.neuroimage.2022.119623},
	language = {en},
	urldate = {2024-02-09},
	journal = {NeuroImage},
	author = {Niso, Guiomar and Botvinik-Nezer, Rotem and Appelhoff, Stefan and De La Vega, Alejandro and Esteban, Oscar and Etzel, Joset A. and Finc, Karolina and Ganz, Melanie and Gau, Rémi and Halchenko, Yaroslav O. and Herholz, Peer and Karakuzu, Agah and Keator, David B. and Markiewicz, Christopher J. and Maumet, Camille and Pernet, Cyril R. and Pestilli, Franco and Queder, Nazek and Schmitt, Tina and Sójka, Weronika and Wagner, Adina S. and Whitaker, Kirstie J. and Rieger, Jochem W.},
	month = nov,
	year = {2022},
	pages = {119623},
	file = {Niso et al. - 2022 - Open and reproducible neuroimaging From study inc.pdf:/home/alpron/Zotero/storage/6IM7E6YQ/Niso et al. - 2022 - Open and reproducible neuroimaging From study inc.pdf:application/pdf},
}

@article{bao_integrating_2022,
	title = {Integrating the {BIDS} {Neuroimaging} {Data} {Format} and {Workflow} {Optimization} for {Large}-{Scale} {Medical} {Image} {Analysis}},
	volume = {35},
	issn = {0897-1889, 1618-727X},
	url = {https://link.springer.com/10.1007/s10278-022-00679-8},
	doi = {10.1007/s10278-022-00679-8},
	abstract = {A robust medical image computing infrastructure must host massive multimodal archives, perform extensive analysis pipelines, and execute scalable job management. An emerging data format standard, the Brain Imaging Data Structure (BIDS), introduces complexities for interfacing with XNAT archives. Moreover, workflow integration is combinatorically problematic when matching large amount of processing to large datasets. Historically, workflow engines have been focused on refining workflows themselves instead of actual job generation. However, such an approach is incompatible with data centric architecture that hosts heterogeneous medical image computing. Distributed automation for XNAT toolkit (DAX) provides large-scale image storage and analysis pipelines with an optimized job management tool. Herein, we describe developments for DAX that allows for integration of XNAT and BIDS standards. We also improve DAX’s efficiencies of diverse containerized workflows in a high-performance computing (HPC) environment. Briefly, we integrate YAML configuration processor scripts to abstract workflow data inputs, data outputs, commands, and job attributes. Finally, we propose an online database–driven mechanism for DAX to efficiently identify the most recent updated sessions, thereby improving job building efficiency on large projects. We refer the proposed overall DAX development in this work as DAX-1 (DAX version 1). To validate the effectiveness of the new features, we verified (1) the efficiency of converting XNAT data to BIDS format and the correctness of the conversion using a collection of BIDS standard containerized neuroimaging workflows, (2) how YAML-based processor simplified configuration setup via a sequence of application pipelines, and (3) the productivity of DAX-1 on generating actual HPC processing jobs compared with earlier DAX baseline method. The empirical results show that (1) DAX-1 converting XNAT data to BIDS has similar speed as accessing XNAT data only; (2) YAML can integrate to the DAX-1 with shallow learning curve for users, and (3) DAX-1 reduced the job/assessor generation latency by finding recent modified sessions. Herein, we present approaches for efficiently integrating XNAT and modern image formats with a scalable workflow engine for the large-scale dataset access and processing.},
	language = {en},
	number = {6},
	urldate = {2024-02-09},
	journal = {Journal of Digital Imaging},
	author = {Bao, Shunxing and Boyd, Brian D. and Kanakaraj, Praitayini and Ramadass, Karthik and Meyer, Francisco A. C. and Liu, Yuqian and Duett, William E. and Huo, Yuankai and Lyu, Ilwoo and Zald, David H. and Smith, Seth A. and Rogers, Baxter P. and Landman, Bennett A.},
	month = dec,
	year = {2022},
	pages = {1576--1589},
	file = {Bao et al. - 2022 - Integrating the BIDS Neuroimaging Data Format and .pdf:/home/alpron/Zotero/storage/HIE7CU7H/Bao et al. - 2022 - Integrating the BIDS Neuroimaging Data Format and .pdf:application/pdf},
}

@article{vallet_toward_2022,
	title = {Toward practical transparent verifiable and long-term reproducible research using {Guix}},
	volume = {9},
	issn = {2052-4463},
	url = {https://www.nature.com/articles/s41597-022-01720-9},
	doi = {10.1038/s41597-022-01720-9},
	abstract = {Abstract
            Reproducibility crisis urge scientists to promote transparency which allows peers to draw same conclusions after performing identical steps from hypothesis to results. Growing resources are developed to open the access to methods, data and source codes. Still, the computational environment, an interface between data and source code running analyses, is not addressed. Environments are usually described with software and library names associated with version labels or provided as an opaque container image. This is not enough to describe the complexity of the dependencies on which they rely to operate on. We describe this issue and illustrate how open tools like Guix can be used by any scientist to share their environment and allow peers to reproduce it. Some steps of research might not be fully reproducible, but at least, transparency for computation is technically addressable. These tools should be considered by scientists willing to promote transparency and open science.},
	language = {en},
	number = {1},
	urldate = {2024-02-27},
	journal = {Scientific Data},
	author = {Vallet, Nicolas and Michonneau, David and Tournier, Simon},
	month = oct,
	year = {2022},
	pages = {597},
	file = {Vallet et al. - 2022 - Toward practical transparent verifiable and long-t.pdf:/home/alpron/Zotero/storage/KT6J79AS/Vallet et al. - 2022 - Toward practical transparent verifiable and long-t.pdf:application/pdf},
}

@article{goble_fair_2020,
	title = {{FAIR} {Computational} {Workflows}},
	volume = {2},
	issn = {2641-435X},
	url = {https://direct.mit.edu/dint/article/2/1-2/108-121/10003},
	doi = {10.1162/dint_a_00033},
	abstract = {Computational workflows describe the complex multi-step methods that are used for data collection, data preparation, analytics, predictive modelling, and simulation that lead to new data products. They can inherently contribute to the FAIR data principles: by processing data according to established metadata; by creating metadata themselves during the processing of data; and by tracking and recording data provenance. These properties aid data quality assessment and contribute to secondary data usage. Moreover, workflows are digital objects in their own right. This paper argues that FAIR principles for workflows need to address their specific nature in terms of their composition of executable software steps, their provenance, and their development.},
	language = {en},
	number = {1-2},
	urldate = {2024-02-28},
	journal = {Data Intelligence},
	author = {Goble, Carole and Cohen-Boulakia, Sarah and Soiland-Reyes, Stian and Garijo, Daniel and Gil, Yolanda and Crusoe, Michael R. and Peters, Kristian and Schober, Daniel},
	month = jan,
	year = {2020},
	pages = {108--121},
	file = {Goble et al. - 2020 - FAIR Computational Workflows.pdf:/home/alpron/Zotero/storage/B723BQ83/Goble et al. - 2020 - FAIR Computational Workflows.pdf:application/pdf},
}

@article{chen_reproducing_2022,
	title = {Reproducing {FSL}'s {fMRI} data analysis via {Nipype}: {Relevance}, challenges, and solutions},
	volume = {1},
	issn = {2813-1193},
	shorttitle = {Reproducing {FSL}'s {fMRI} data analysis via {Nipype}},
	url = {https://www.frontiersin.org/articles/10.3389/fnimg.2022.953215/full},
	doi = {10.3389/fnimg.2022.953215},
	abstract = {The “replication crisis” in neuroscientific research has led to calls for improving reproducibility. In traditional neuroscience analyses, irreproducibility may occur as a result of issues across various stages of the methodological process. For example, different operating systems, different software packages, and even different versions of the same package can lead to variable results. Nipype, an open-source Python project, integrates different neuroimaging software packages uniformly to improve the reproducibility of neuroimaging analyses. Nipype has the advantage over traditional software packages (e.g., FSL, ANFI, SPM, etc.) by (1) providing comprehensive software development frameworks and usage information, (2) improving computational efficiency, (3) facilitating reproducibility through sufficient details, and (4) easing the steep learning curve. Despite the rich tutorials it has provided, the Nipype community lacks a standard three-level GLM tutorial for FSL. Using the classical Flanker task dataset, we first precisely reproduce a three-level GLM analysis with FSL
              via
              Nipype. Next, we point out some undocumented discrepancies between Nipype and FSL functions that led to substantial differences in results. Finally, we provide revised Nipype code in re-executable notebooks that assure result invariability between FSL and Nipype. Our analyses, notebooks, and operating software specifications (e.g., docker build files) are available on the Open Science Framework platform.},
	urldate = {2024-02-28},
	journal = {Frontiers in Neuroimaging},
	author = {Chen, Yibei and Hopp, Frederic R. and Malik, Musa and Wang, Paula T. and Woodman, Kylie and Youk, Sungbin and Weber, René},
	month = jul,
	year = {2022},
	pages = {953215},
	file = {Chen et al. - 2022 - Reproducing FSL's fMRI data analysis via Nipype R.pdf:/home/alpron/Zotero/storage/SA5LQY4U/Chen et al. - 2022 - Reproducing FSL's fMRI data analysis via Nipype R.pdf:application/pdf},
}

@misc{noauthor_bids-standardbep028_bidsprov_2024,
	title = {bids-standard/{BEP028}\_BIDSprov},
	copyright = {CC-BY-4.0},
	url = {https://github.com/bids-standard/BEP028_BIDSprov},
	abstract = {Organizing and coordinating BIDS extension proposal 28 : BIDS Provenance},
	urldate = {2024-02-29},
	publisher = {Brain Imaging Data Structure},
	month = jan,
	year = {2024},
	note = {original-date: 2020-03-05T14:14:58Z},
	keywords = {BIDS, provenance},
}

@misc{noauthor_bids_nodate,
	title = {{BIDS} {Extension} {Proposal} ({BEP028}): {Provenance}},
	shorttitle = {{BIDS} {Extension} {Proposal} ({BEP028})},
	url = {https://docs.google.com/document/d/1vw3VNDof5cecv2PkFp7Lw_pNUTUo8-m8V4SIdtGJVKs/edit?usp=sharing&usp=embed_facebook},
	language = {fr},
	urldate = {2024-02-29},
	journal = {Google Docs},
	keywords = {BIDS, provenance},
	file = {Snapshot:/home/alpron/Zotero/storage/84FRG2PA/edit.html:text/html},
}

@misc{noauthor_computational_nodate,
	title = {Computational basis and {ReproIn}/{DataLad}: {A} complete portable and reproducible {fMRI} study from scratch},
	url = {http://www.repronim.org/ohbm2018-training/03-01-reproin/},
	urldate = {2024-02-29},
	keywords = {datalad},
	file = {Computational basis and ReproIn/DataLad\: ReproIn/DataLad\: A complete portable and reproducible fMRI study from scratch:/home/alpron/Zotero/storage/C7QV89WR/03-01-reproin.html:text/html},
}

@article{maumet_best_nodate,
	title = {The best of both worlds: using semantic web with {JSON}-{LD}. {An} example with {NIDM}-{Results} \& {Datalad}},
	language = {en},
	author = {Maumet, Camille and Ghosh, Satrajit and Halchenko, Yaroslav O and Jarecka, Dorota and Nichols, Nolan B and Poline, Jean-Baptiste and Hanke, Michael},
	file = {Maumet et al. - The best of both worlds using semantic web with J.pdf:/home/alpron/Zotero/storage/MW5U3VID/Maumet et al. - The best of both worlds using semantic web with J.pdf:application/pdf},
}

@article{nichols_best_2017,
	title = {Best practices in data analysis and sharing in neuroimaging using {MRI}},
	volume = {20},
	language = {en},
	number = {3},
	journal = {CO M M E N TA RY},
	author = {Nichols, Thomas E and Das, Samir and Eickhoff, Simon B and Evans, Alan C and Glatard, Tristan and Hanke, Michael and Kriegeskorte, Nikolaus and Milham, Michael P and Poldrack, Russell A and Poline, Jean-Baptiste and Proal, Erika and Thirion, Bertrand},
	year = {2017},
	file = {Nichols et al. - 2017 - Best practices in data analysis and sharing in neu.pdf:/home/alpron/Zotero/storage/H88I739N/Nichols et al. - 2017 - Best practices in data analysis and sharing in neu.pdf:application/pdf},
}

@misc{noauthor_datalad_nodate,
	title = {{DataLad} extension module for neuroimaging studies — {Datalad} for {Hirnis} 0.0.8 documentation},
	url = {http://docs.datalad.org/projects/hirni/en/latest/index.html},
	urldate = {2024-03-19},
	keywords = {datalad},
	file = {DataLad extension module for neuroimaging studies — Datalad for Hirnis 0.0.8 documentation:/home/alpron/Zotero/storage/Z3HMSFLD/index.html:text/html},
}

@misc{halchenko_nipyheudiconv_2023,
	title = {nipy/heudiconv: v1.0.0},
	copyright = {Open Access},
	shorttitle = {nipy/heudiconv},
	url = {https://zenodo.org/record/1012598},
	abstract = {💥 Breaking Change gh-actions: Bump actions/checkout from 3 to 4 \#703 (@dependabot[bot]) 🚀 Enhancement Fix inconsistent behavior of existing session when using -d compared to --files option: raise an AssertionError instead of just a warning \#682 (@neurorepro) 🐛 Bug Fix Various tiny enhancements flake etc demanded \#702 (@yarikoptic) Boost claimed BIDS version to 1.8.0 from 1.4.1 \#699 (@yarikoptic) Point to Courtois-neuromod heuristic \#702 (@yarikoptic) 🏠 Internal Add codespell to lint tox env \#706 (@yarikoptic) test-compare-two-versions.sh: also ignore differences in HeudiconvVersion field in jsons since we have it there now \#685 (@yarikoptic) 📝 Documentation Add description of placeholders which could be used in the produced templates \#681 (@yarikoptic) Authors: 3 @dependabot[bot] Michael (@neurorepro) Yaroslav Halchenko (@yarikoptic)},
	urldate = {2024-03-19},
	publisher = {[object Object]},
	author = {Halchenko, Yaroslav and Goncalves, Mathias and Velasco, Pablo and Di Oleggio Castello, Matteo Visconti and {Satrajit Ghosh} and Salo, Taylor and Wodder, John T. and Hanke, Michael and Sadil, Patrick and Christian, Horea and Michael and Dae and Tilley, Steven and Kent, James and To, Isaac and Brett, Matthew and Amlien, Inge and Gorgolewski, Chris and Markiewicz, Chris and Lukas, Darren Christopher and Callenberg, Keith and {Aksoo} and Kahn, Ari and Macdonald, Austin and Poldrack, Benjamin and Melo, Bruno and Braun, Henry and Lee, John and Pellman, John and Michael},
	month = sep,
	year = {2023},
	doi = {10.5281/ZENODO.1012598},
	keywords = {datalad, BIDS},
}

@article{dafflon_guided_2022,
	title = {A guided multiverse study of neuroimaging analyses},
	volume = {13},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-022-31347-8},
	doi = {10.1038/s41467-022-31347-8},
	abstract = {Abstract
            For most neuroimaging questions the range of possible analytic choices makes it unclear how to evaluate conclusions from any single analytic method. One possible way to address this issue is to evaluate all possible analyses using a multiverse approach, however, this can be computationally challenging and sequential analyses on the same data can compromise predictive power. Here, we establish how active learning on a low-dimensional space capturing the inter-relationships between pipelines can efficiently approximate the full spectrum of analyses. This approach balances the benefits of a multiverse analysis without incurring the cost on computational and predictive power. We illustrate this approach with two functional MRI datasets (predicting brain age and autism diagnosis) demonstrating how a multiverse of analyses can be efficiently navigated and mapped out using active learning. Furthermore, our presented approach not only identifies the subset of analysis techniques that are best able to predict age or classify individuals with autism spectrum disorder and healthy controls, but it also allows the relationships between analyses to be quantified.},
	language = {en},
	number = {1},
	urldate = {2024-03-06},
	journal = {Nature Communications},
	author = {Dafflon, Jessica and F. Da Costa, Pedro and Váša, František and Monti, Ricardo Pio and Bzdok, Danilo and Hellyer, Peter J. and Turkheimer, Federico and Smallwood, Jonathan and Jones, Emily and Leech, Robert},
	month = jun,
	year = {2022},
	pages = {3758},
	file = {Dafflon et al. - 2022 - A guided multiverse study of neuroimaging analyses.pdf:/home/alpron/Zotero/storage/8S87S654/Dafflon et al. - 2022 - A guided multiverse study of neuroimaging analyses.pdf:application/pdf},
}

@unpublished{vila_impact_2024,
	title = {The {Impact} of {Hardware} {Variability} on {Applications} {Packaged} with {Docker} and {Guix}: a {Case} {Study} in {Neuroimaging}},
	shorttitle = {The {Impact} of {Hardware} {Variability} on {Applications} {Packaged} with {Docker} and {Guix}},
	url = {https://hal.science/hal-04480308},
	abstract = {The reproducibility of neuroimaging analyses across computational environments has gained significant attention over the last few years. While software containerization solutions such as Docker and Singularity have been deployed to mask the effects of softwareinduced variability, variations in hardware architectures still impact neuroimaging results in an unclear way. We study the effect of hardware variability on linear registration results produced by the FSL FLIRT application, a widely-used software component in neuroimaging data analyses. Using the Grid'5000 infrastructure, we study the effect of nine different CPU models using two software packaging systems (Docker and Guix), and we compare the resulting hardware variability to numerical variability measured with random rounding. Results show that hardware, software, and numerical variability lead to perturbations of similar magnitudes — albeit uncorrelated — suggesting that these three types of variability act as independent sources of numerical noise with similar
magnitude. Therefore, random rounding is as a practical solution to measure the effect of numerical noise induced by hardware variability in this application. The effect of hardware perturbations on linear registration remains moderate, with average translation errors of 0.1 mm (maximum: 0.5 mm) and average rotation errors of 0.02 deg (maximum: 0.2 deg). Such variations might impact downstream analyses when linear registration is used as initialization step for other operations.},
	urldate = {2024-03-21},
	author = {Vila, Gaël and Medernach, Emmanuel and Gonzalez, Inés and Bonnet, Axel and Chatelain, Yohan and Sdika, Michaël and Glatard, Tristan and Camarasu-Pop, Sorina},
	month = feb,
	year = {2024},
	keywords = {CPU micro-architecture, Neuroimaging, Random rounding, Reproducibility, Software packaging},
	file = {Vila et al. - 2024 - The Impact of Hardware Variability on Applications.pdf:/home/alpron/Zotero/storage/HQPFHLFC/Vila et al. - 2024 - The Impact of Hardware Variability on Applications.pdf:application/pdf},
}

@misc{maumet_towards_nodate,
	title = {Towards reusable derived data in neuroimaging},
	language = {en},
	author = {Maumet, Camille},
	file = {Maumet - Towards reusable derived data in neuroimaging.pdf:/home/alpron/Zotero/storage/SPIMSHT9/Maumet - Towards reusable derived data in neuroimaging.pdf:application/pdf},
}

@phdthesis{maumet_towards_nodate-1,
	title = {Towards reproducible neuroimaging: {Solutions} for sharing and re-using brain imaging data},
	language = {en},
	author = {Maumet, Camille},
	keywords = {Important},
	file = {Maumet - Towards reproducible neuroimaging Solutions for s.pdf:/home/alpron/Zotero/storage/S7YM9AUP/Maumet - Towards reproducible neuroimaging Solutions for s.pdf:application/pdf},
}

@article{martone_past_2024,
	title = {The past, present and future of neuroscience data sharing: a perspective on the state of practices and infrastructure for {FAIR}},
	volume = {17},
	issn = {1662-5196},
	shorttitle = {The past, present and future of neuroscience data sharing},
	url = {https://www.frontiersin.org/articles/10.3389/fninf.2023.1276407/full},
	doi = {10.3389/fninf.2023.1276407},
	abstract = {Neuroscience has made significant strides over the past decade in moving from a largely closed science characterized by anemic data sharing, to a largely open science where the amount of publicly available neuroscience data has increased dramatically. While this increase is driven in significant part by large prospective data sharing studies, we are starting to see increased sharing in the long tail of neuroscience data, driven no doubt by journal requirements and funder mandates. Concomitant with this shift to open is the increasing support of the FAIR data principles by neuroscience practices and infrastructure. FAIR is particularly critical for neuroscience with its multiplicity of data types, scales and model systems and the infrastructure that serves them. As envisioned from the early days of neuroinformatics, neuroscience is currently served by a globally distributed ecosystem of neuroscience-centric data repositories, largely specialized around data types. To make neuroscience data findable, accessible, interoperable, and reusable requires the coordination across different stakeholders, including the researchers who produce the data, data repositories who make it available, the aggregators and indexers who field search engines across the data, and community organizations who help to coordinate efforts and develop the community standards critical to FAIR. The International Neuroinformatics Coordinating Facility has led efforts to move neuroscience toward FAIR, fielding several resources to help researchers and repositories achieve FAIR. In this perspective, I provide an overview of the components and practices required to achieve FAIR in neuroscience and provide thoughts on the past, present and future of FAIR infrastructure for neuroscience, from the laboratory to the search engine.},
	language = {en},
	urldate = {2024-04-22},
	journal = {Frontiers in Neuroinformatics},
	author = {Martone, Maryann E.},
	month = jan,
	year = {2024},
	keywords = {Important, Review, oscars},
	pages = {1276407},
	file = {Martone - 2024 - The past, present and future of neuroscience data .pdf:/home/alpron/Zotero/storage/JSZZGSIN/Martone - 2024 - The past, present and future of neuroscience data .pdf:application/pdf},
}

@article{sanz-robinson_open-source_nodate,
	title = {Open-source tools and platforms to investigate analytical variability in neuroimaging},
	author = {Sanz-Robinson, Jacob and Wang, Michelle and McPherson, Brent and Glatard, Tristan and Poline, Jean-Baptiste},
	note = {Publisher: OSF},
	file = {Sanz-Robinson et al. - Open-source tools and platforms to investigate ana.pdf:/home/alpron/Zotero/storage/ZCERUVV7/Sanz-Robinson et al. - Open-source tools and platforms to investigate ana.pdf:application/pdf},
}

@article{luppi_systematic_2024,
	title = {Systematic evaluation of {fMRI} data-processing pipelines for consistent functional connectomics},
	volume = {15},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-024-48781-5},
	doi = {10.1038/s41467-024-48781-5},
	abstract = {Abstract
            Functional interactions between brain regions can be viewed as a network, enabling neuroscientists to investigate brain function through network science. Here, we systematically evaluate 768 data-processing pipelines for network reconstruction from resting-state functional MRI, evaluating the effect of brain parcellation, connectivity definition, and global signal regression. Our criteria seek pipelines that minimise motion confounds and spurious test-retest discrepancies of network topology, while being sensitive to both inter-subject differences and experimental effects of interest. We reveal vast and systematic variability across pipelines’ suitability for functional connectomics. Inappropriate choice of data-processing pipeline can produce results that are not only misleading, but systematically so, with the majority of pipelines failing at least one criterion. However, a set of optimal pipelines consistently satisfy all criteria across different datasets, spanning minutes, weeks, and months. We provide a full breakdown of each pipeline’s performance across criteria and datasets, to inform future best practices in functional connectomics.},
	language = {en},
	number = {1},
	urldate = {2024-06-07},
	journal = {Nature Communications},
	author = {Luppi, Andrea I. and Gellersen, Helena M. and Liu, Zhen-Qi and Peattie, Alexander R. D. and Manktelow, Anne E. and Adapa, Ram and Owen, Adrian M. and Naci, Lorina and Menon, David K. and Dimitriadis, Stavros I. and Stamatakis, Emmanuel A.},
	month = jun,
	year = {2024},
	pages = {4745},
	file = {Luppi et al. - 2024 - Systematic evaluation of fMRI data-processing pipe.pdf:/home/alpron/Zotero/storage/HB2VTQ7Y/Luppi et al. - 2024 - Systematic evaluation of fMRI data-processing pipe.pdf:application/pdf},
}

@misc{michael_demidenko_demidenmpyrelimri_2024,
	title = {demidenm/{PyReliMRI}: v2.1.0},
	copyright = {Creative Commons Attribution 4.0 International},
	shorttitle = {demidenm/{PyReliMRI}},
	url = {https://zenodo.org/doi/10.5281/zenodo.12512085},
	abstract = {Several changes and additions are made:


A more comprehensive docstring is used to enhance information on PyReliMRI readthedocs

The ICC function tests were expanded to confirm ICC, between subject variance and within subject variance estimates from sumqc\_icc() are compared to estimates from liner mixed effect model outputs from stat models. Specifically, see test comparing pyrelimri.icc vs statsmodels

The returned list of estimates for ICC computations have been revised. Previously, the MSBS and MSWS were returned. However, this was not always informative to the ICC being computed, nor to understanding how variances are impacted. For example, when the ICC(3,1) was returned, the MSBS and MSWS were not directly used in the computation. Furthermore, they were not true to the denominator and the numerator for each formula. Thus, for voxelwise, edgewise, rois, etc., where sumsq\_icc() is used, now a dictionary is returned with: 1) ICC estimate, 2) upper 95CI, 3) lower 95CI, 4) between subject variance, 5) within subject variance, 6) between measure variance (in case of ICC(2,1), otherwise None/empty values)

Included a TR-by-TR timeseries extraction for masks/coordinates and locked to onset times/behavioral events. This provides the extraction of the mean signal change across a timeseries for a given ROI. This is based on Nilearn's niftimaskers where standardizing via the call 'psc'

Included an edgewise\_icc computation on correlation matrices. This is strictly for comparing a list of lists of variables includes subjects' correlation matrices across runs/sessions or the paths to these correlation matrices. Note, if using pandas dataframe standards, it is assumed that the header = None and row index = None},
	urldate = {2024-06-24},
	publisher = {Zenodo},
	author = {Michael Demidenko and Russ Poldrack and Chris Markiewicz and Elizabeth DuPre},
	month = jun,
	year = {2024},
	doi = {10.5281/ZENODO.12512085},
}

@misc{segal_embracing_2024,
	title = {Embracing variability in the search for biological mechanisms of psychiatric illness},
	copyright = {https://creativecommons.org/licenses/by/4.0/legalcode},
	url = {https://osf.io/5mz46},
	doi = {10.31219/osf.io/5mz46},
	abstract = {Despite decades of research, we lack objective diagnostic or prognostic biomarkers of mental health problems. A key reason for this limited progress is a reliance on the traditional case-control paradigm, which assumes that each disorder has a single cause that can be uncovered by comparing average phenotypic values of cases and control samples. Here, we discuss the problematic assumptions on which this paradigm is based and highlight recent efforts that seek to characterize, rather than minimize, the inherent clinical and biological variability that characterizes psychiatric populations. We argue that embracing such variability will be necessary to understand pathophysiological mechanisms and to develop more targeted and effective treatments.},
	urldate = {2024-07-05},
	author = {Segal, Ashlea and Tiego, Jeggan and Holmes, Avram J and {Andre} and Fornito, Alex},
	month = jun,
	year = {2024},
	keywords = {to read, generalizibility},
	file = {Segal et al. - 2024 - Embracing variability in the search for biological.pdf:/home/alpron/Zotero/storage/PC58H3RX/Segal et al. - 2024 - Embracing variability in the search for biological.pdf:application/pdf},
}

@article{giehl_sharing_2024,
	title = {Sharing brain imaging data in the {Open} {Science} era: how and why?},
	volume = {6},
	issn = {25897500},
	shorttitle = {Sharing brain imaging data in the {Open} {Science} era},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2589750024000694},
	doi = {10.1016/S2589-7500(24)00069-4},
	language = {en},
	number = {7},
	urldate = {2024-07-15},
	journal = {The Lancet Digital Health},
	author = {Giehl, Kathrin and Mutsaerts, Henk-Jan and Aarts, Kristien and Barkhof, Frederik and Caspers, Svenja and Chetelat, Gaël and Colin, Marie-Elisabeth and Düzel, Emrah and Frisoni, Giovanni B and Ikram, M Arfan and Jovicich, Jorge and Morbelli, Silvia and Oertel, Wolfgang and Paret, Christian and Perani, Daniela and Ritter, Petra and Segura, Bàrbara and Wisse, Laura E M and De Witte, Elke and Cappa, Stefano F and Van Eimeren, Thilo},
	month = jul,
	year = {2024},
	keywords = {Review},
	pages = {e526--e535},
	file = {Giehl et al. - 2024 - Sharing brain imaging data in the Open Science era.pdf:/home/alpron/Zotero/storage/KLC63VH2/Giehl et al. - 2024 - Sharing brain imaging data in the Open Science era.pdf:application/pdf},
}

@article{rosenblatt_data_2024,
	title = {Data leakage inflates prediction performance in connectome-based machine learning models},
	volume = {15},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-024-46150-w},
	doi = {10.1038/s41467-024-46150-w},
	abstract = {Abstract
            Predictive modeling is a central technique in neuroimaging to identify brain-behavior relationships and test their generalizability to unseen data. However, data leakage undermines the validity of predictive models by breaching the separation between training and test data. Leakage is always an incorrect practice but still pervasive in machine learning. Understanding its effects on neuroimaging predictive models can inform how leakage affects existing literature. Here, we investigate the effects of five forms of leakage–involving feature selection, covariate correction, and dependence between subjects–on functional and structural connectome-based machine learning models across four datasets and three phenotypes. Leakage via feature selection and repeated subjects drastically inflates prediction performance, whereas other forms of leakage have minor effects. Furthermore, small datasets exacerbate the effects of leakage. Overall, our results illustrate the variable effects of leakage and underscore the importance of avoiding data leakage to improve the validity and reproducibility of predictive modeling.},
	language = {en},
	number = {1},
	urldate = {2024-07-15},
	journal = {Nature Communications},
	author = {Rosenblatt, Matthew and Tejavibulya, Link and Jiang, Rongtao and Noble, Stephanie and Scheinost, Dustin},
	month = feb,
	year = {2024},
	pages = {1829},
	file = {Rosenblatt et al. - 2024 - Data leakage inflates prediction performance in co.pdf:/home/alpron/Zotero/storage/P3NIEWBK/Rosenblatt et al. - 2024 - Data leakage inflates prediction performance in co.pdf:application/pdf},
}

@article{jadavji_editorial_2023,
	title = {Editorial: {Reproducibility} in neuroscience},
	volume = {17},
	issn = {1662-5145},
	shorttitle = {Editorial},
	url = {https://www.frontiersin.org/articles/10.3389/fnint.2023.1271818/full},
	doi = {10.3389/fnint.2023.1271818},
	language = {en},
	urldate = {2024-07-15},
	journal = {Frontiers in Integrative Neuroscience},
	author = {Jadavji, Nafisa M. and Haelterman, Nele A. and Sud, Reeteka and Antonietti, Alberto},
	month = aug,
	year = {2023},
	pages = {1271818},
	file = {Jadavji et al. - 2023 - Editorial Reproducibility in neuroscience.pdf:/home/alpron/Zotero/storage/G7DDIIRI/Jadavji et al. - 2023 - Editorial Reproducibility in neuroscience.pdf:application/pdf},
}

@article{weigard_flexible_2024,
	title = {Flexible adaptation of task-positive brain networks predicts efficiency of evidence accumulation},
	volume = {7},
	issn = {2399-3642},
	url = {https://www.nature.com/articles/s42003-024-06506-w},
	doi = {10.1038/s42003-024-06506-w},
	abstract = {Abstract
            Efficiency of evidence accumulation (EEA), an individual’s ability to selectively gather goal-relevant information to make adaptive choices, is thought to be a key neurocomputational mechanism associated with cognitive functioning and transdiagnostic risk for psychopathology. However, the neural basis of individual differences in EEA is poorly understood, especially regarding the role of largescale brain network dynamics. We leverage data from 5198 participants from the Human Connectome Project and Adolescent Brain Cognitive Development Study to demonstrate a strong association between EEA and flexible adaptation to cognitive demand in the “task-positive” frontoparietal and dorsal attention networks. Notably, individuals with higher EEA displayed divergent task-positive network activation across n-back task conditions: higher activation under high cognitive demand (2-back) and lower activation under low demand (0-back). These findings suggest that brain networks’ flexible adaptation to cognitive demands is a key neural underpinning of EEA.},
	language = {en},
	number = {1},
	urldate = {2024-07-22},
	journal = {Communications Biology},
	author = {Weigard, Alexander and Angstadt, Mike and Taxali, Aman and Heathcote, Andrew and Heitzeg, Mary M. and Sripada, Chandra},
	month = jul,
	year = {2024},
	keywords = {to read},
	pages = {801},
}

@misc{demidenko_impact_2024,
	title = {Impact of analytic decisions on test-retest reliability of individual and group estimates in functional magnetic resonance imaging: a multiverse analysis using the monetary incentive delay task},
	shorttitle = {Impact of analytic decisions on test-retest reliability of individual and group estimates in functional magnetic resonance imaging},
	url = {http://biorxiv.org/lookup/doi/10.1101/2024.03.19.585755},
	doi = {10.1101/2024.03.19.585755},
	abstract = {Abstract
          
            Empirical studies reporting low test-retest reliability of individual blood oxygen-level dependent (BOLD) signal estimates in functional magnetic resonance imaging (fMRI) data have resurrected interest among cognitive neuroscientists in methods that may improve reliability in fMRI. Over the last decade, several individual studies have reported that modeling decisions, such as smoothing, motion correction and contrast selection, may improve estimates of test-retest reliability of BOLD signal estimates. However, it remains an empirical question whether certain analytic decisions
            consistently
            improve individual and group level reliability estimates in an fMRI task across multiple large, independent samples. This study used three independent samples (
            N
            s: 60, 81, 119) that collected the same task (Monetary Incentive Delay task) across two runs and two sessions to evaluate the effects of analytic decisions on the individual (intraclass correlation coefficient [ICC(3,1)]) and group (Jaccard/Spearman
            rho
            ) reliability estimates of BOLD activity of task fMRI data. The analytic decisions in this study vary across four categories: smoothing kernel (five options), motion correction (four options), task parameterizing (three options) and task contrasts (four options), totaling 240 different pipeline permutations. Across all 240 pipelines, the median ICC estimates are consistently low, with a maximum median ICC estimate of .43 - .55 across the three samples. The analytic decisions with the greatest impact on the median ICC and group similarity estimates are the
            Implicit Baseline
            contrast, Cue Model parameterization and a larger smoothing kernel. Using an
            Implicit Baseline
            in a contrast condition meaningfully increased group similarity and ICC estimates as compared to using the
            Neutral
            cue. This effect was largest for the Cue Model parameterization; however, improvements in reliability came at the cost of interpretability. This study illustrates that estimates of reliability in the MID task are consistently low and variable at small samples, and a higher test-retest reliability may not always improve interpretability of the estimated BOLD signal.},
	language = {en},
	urldate = {2024-07-22},
	author = {Demidenko, Michael I. and Mumford, Jeanette A. and Poldrack, Russell A.},
	month = mar,
	year = {2024},
	keywords = {to read, to\_add\_github},
	file = {Demidenko et al. - 2024 - Impact of analytic decisions on test-retest reliab.pdf:/home/alpron/Zotero/storage/JDL4YPK4/Demidenko et al. - 2024 - Impact of analytic decisions on test-retest reliab.pdf:application/pdf},
}

@article{soskic_garden_nodate,
	title = {Garden of forking paths in {ERP} research – {Effects} of varying pre-processing and analysis steps in an {N400} experiment},
	volume = {n/a},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/psyp.14628},
	doi = {https://doi.org/10.1111/psyp.14628},
	abstract = {Abstract This study tackles the Garden of Forking Paths, as a challenge for replicability and reproducibility of ERP studies. Here, we applied a multiverse analysis to a sample ERP N400 dataset, donated by an independent research team. We analyzed this dataset using 14 pipelines selected to showcase the full range of methodological variability found in the N400 literature using systematic review approach. The selected pipelines were compared in depth by looking into statistical test outcomes, descriptive statistics, effect size, data quality, and statistical power. In this way we provide a worked example of how analytic flexibility can impact results in research fields with high dimensionality such as ERP, when analyzed using standard null-hypothesis significance testing. Out of the methodological decisions that were varied, high-pass filter cut-off, artifact removal method, baseline duration, reference, measurement latency and locations, and amplitude measure (peak vs. mean) were all shown to affect at least some of the study outcome measures. Low-pass filtering was the only step which did not notably influence any of these measures. This study shows that even some of the seemingly minor procedural deviations can influence the conclusions of an ERP study. We demonstrate the power of multiverse analysis in both identifying the most reliable effects in a given study, and for providing insights into consequences of methodological decisions.},
	number = {n/a},
	journal = {Psychophysiology},
	author = {Šoškić, Anđela and Styles, Suzy J. and Kappenman, Emily S. and Ković, Vanja},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/psyp.14628},
	keywords = {to read},
	pages = {e14628},
}

@article{renton_neurodesk_2024,
	title = {Neurodesk: an accessible, flexible and portable data analysis environment for reproducible neuroimaging},
	volume = {21},
	issn = {1548-7091, 1548-7105},
	shorttitle = {Neurodesk},
	url = {https://www.nature.com/articles/s41592-023-02145-x},
	doi = {10.1038/s41592-023-02145-x},
	language = {en},
	number = {5},
	urldate = {2024-07-23},
	journal = {Nature Methods},
	author = {Renton, Angela I. and Dao, Thuy T. and Johnstone, Tom and Civier, Oren and Sullivan, Ryan P. and White, David J. and Lyons, Paris and Slade, Benjamin M. and Abbott, David F. and Amos, Toluwani J. and Bollmann, Saskia and Botting, Andy and Campbell, Megan E. J. and Chang, Jeryn and Close, Thomas G. and Dörig, Monika and Eckstein, Korbinian and Egan, Gary F. and Evas, Stefanie and Flandin, Guillaume and Garner, Kelly G. and Garrido, Marta I. and Ghosh, Satrajit S. and Grignard, Martin and Halchenko, Yaroslav O. and Hannan, Anthony J. and Heinsfeld, Anibal S. and Huber, Laurentius and Hughes, Matthew E. and Kaczmarzyk, Jakub R. and Kasper, Lars and Kuhlmann, Levin and Lou, Kexin and Mantilla-Ramos, Yorguin-Jose and Mattingley, Jason B. and Meier, Michael L. and Morris, Jo and Narayanan, Akshaiy and Pestilli, Franco and Puce, Aina and Ribeiro, Fernanda L. and Rogasch, Nigel C. and Rorden, Chris and Schira, Mark M. and Shaw, Thomas B. and Sowman, Paul F. and Spitz, Gershon and Stewart, Ashley W. and Ye, Xincheng and Zhu, Judy D. and Narayanan, Aswin and Bollmann, Steffen},
	month = may,
	year = {2024},
	pages = {804--808},
}

@article{spisak_multivariate_2023,
	title = {Multivariate {BWAS} can be replicable with moderate sample sizes},
	volume = {615},
	copyright = {2023 The Author(s)},
	issn = {1476-4687},
	url = {https://www.nature.com/articles/s41586-023-05745-x},
	doi = {10.1038/s41586-023-05745-x},
	language = {en},
	number = {7951},
	urldate = {2024-07-31},
	journal = {Nature},
	author = {Spisak, Tamas and Bingel, Ulrike and Wager, Tor D.},
	month = mar,
	year = {2023},
	note = {Publisher: Nature Publishing Group},
	keywords = {Cognitive neuroscience, Learning algorithms, Neuroscience},
	pages = {E4--E7},
	file = {Full Text PDF:/home/alpron/Zotero/storage/PQNM323H/Spisak et al. - 2023 - Multivariate BWAS can be replicable with moderate .pdf:application/pdf},
}

@article{marek_reproducible_2022,
	title = {Reproducible brain-wide association studies require thousands of individuals},
	volume = {603},
	copyright = {2022 The Author(s), under exclusive licence to Springer Nature Limited},
	issn = {1476-4687},
	url = {https://www.nature.com/articles/s41586-022-04492-9},
	doi = {10.1038/s41586-022-04492-9},
	abstract = {Magnetic resonance imaging (MRI) has transformed our understanding of the human brain through well-replicated mapping of abilities to specific structures (for example, lesion studies) and functions1–3 (for example, task functional MRI (fMRI)). Mental health research and care have yet to realize similar advances from MRI. A primary challenge has been replicating associations between inter-individual differences in brain structure or function and complex cognitive or mental health phenotypes (brain-wide association studies (BWAS)). Such BWAS have typically relied on sample sizes appropriate for classical brain mapping4 (the median neuroimaging study sample size is about 25), but potentially too small for capturing reproducible brain–behavioural phenotype associations5,6. Here we used three of the largest neuroimaging datasets currently available—with a total sample size of around 50,000 individuals—to quantify BWAS effect sizes and reproducibility as a function of sample size. BWAS associations were smaller than previously thought, resulting in statistically underpowered studies, inflated effect sizes and replication failures at typical sample sizes. As sample sizes grew into the thousands, replication rates began to improve and effect size inflation decreased. More robust BWAS effects were detected for functional MRI (versus structural), cognitive tests (versus mental health questionnaires) and multivariate methods (versus univariate). Smaller than expected brain–phenotype associations and variability across population subsamples can explain widespread BWAS replication failures. In contrast to non-BWAS approaches with larger effects (for example, lesions, interventions and within-person), BWAS reproducibility requires samples with thousands of individuals.},
	language = {en},
	number = {7902},
	urldate = {2024-07-31},
	journal = {Nature},
	author = {Marek, Scott and Tervo-Clemmens, Brenden and Calabro, Finnegan J. and Montez, David F. and Kay, Benjamin P. and Hatoum, Alexander S. and Donohue, Meghan Rose and Foran, William and Miller, Ryland L. and Hendrickson, Timothy J. and Malone, Stephen M. and Kandala, Sridhar and Feczko, Eric and Miranda-Dominguez, Oscar and Graham, Alice M. and Earl, Eric A. and Perrone, Anders J. and Cordova, Michaela and Doyle, Olivia and Moore, Lucille A. and Conan, Gregory M. and Uriarte, Johnny and Snider, Kathy and Lynch, Benjamin J. and Wilgenbusch, James C. and Pengo, Thomas and Tam, Angela and Chen, Jianzhong and Newbold, Dillan J. and Zheng, Annie and Seider, Nicole A. and Van, Andrew N. and Metoki, Athanasia and Chauvin, Roselyne J. and Laumann, Timothy O. and Greene, Deanna J. and Petersen, Steven E. and Garavan, Hugh and Thompson, Wesley K. and Nichols, Thomas E. and Yeo, B. T. Thomas and Barch, Deanna M. and Luna, Beatriz and Fair, Damien A. and Dosenbach, Nico U. F.},
	month = mar,
	year = {2022},
	note = {Publisher: Nature Publishing Group},
	keywords = {Cognitive neuroscience, Psychology},
	pages = {654--660},
	file = {Full Text PDF:/home/alpron/Zotero/storage/RVNDP5RZ/Marek et al. - 2022 - Reproducible brain-wide association studies requir.pdf:application/pdf},
}

@article{grady_influence_2020,
	title = {Influence of sample size and analytic approach on stability and interpretation of brain‐behavior correlations in task‐related {fMRI} data},
	volume = {42},
	issn = {1065-9471},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7721240/},
	doi = {10.1002/hbm.25217},
	abstract = {Limited statistical power due to small sample sizes is a problem in fMRI research. Most of the work to date has examined the impact of sample size on task‐related activation, with less attention paid to the influence of sample size on brain‐behavior correlations, especially in actual experimental fMRI data. We addressed this issue using two large data sets (a working memory task, N = 171, and a relational processing task, N = 865) and both univariate and multivariate approaches to voxel‐wise correlations. We created subsamples of different sizes and calculated correlations between task‐related activity at each voxel and task performance. Across both data sets the magnitude of the brain‐behavior correlations decreased and similarity across spatial maps increased with larger sample sizes. The multivariate technique identified more extensive correlated areas and more similarity across spatial maps, suggesting that a multivariate approach would provide a consistent advantage over univariate approaches in the stability of brain‐behavior correlations. In addition, the multivariate analyses showed that a sample size of roughly 80 or more participants would be needed for stable estimates of correlation magnitude in these data sets. Importantly, a number of additional factors would likely influence the choice of sample size for assessing such correlations in any given experiment, including the cognitive task of interest and the amount of data collected per participant. Our results provide novel experimental evidence in two independent data sets that the sample size commonly used in fMRI studies of 20–30 participants is very unlikely to be sufficient for obtaining reproducible brain‐behavior correlations, regardless of analytic approach., Limited statistical power due to small sample sizes is a problem in fMRI research. Most of the work to date has examined the impact of sample size on task‐related activation, with less attention paid to the influence of sample size on brain‐behavior correlations, especially in actual experimental fMRI data. Our results provide novel experimental evidence in two independent data sets that the sample size commonly used in fMRI studies of 20–30 participants is very unlikely to be sufficient for obtaining reproducible brain‐behavior correlations, regardless of whether a univariate or multivariate approach is used.},
	number = {1},
	urldate = {2024-07-31},
	journal = {Human Brain Mapping},
	author = {Grady, Cheryl L. and Rieck, Jenny R. and Nichol, Daniel and Rodrigue, Karen M. and Kennedy, Kristen M.},
	month = sep,
	year = {2020},
	pmid = {32996635},
	pmcid = {PMC7721240},
	pages = {204--219},
	file = {PubMed Central Full Text PDF:/home/alpron/Zotero/storage/W742CY9G/Grady et al. - 2020 - Influence of sample size and analytic approach on .pdf:application/pdf},
}

@misc{deyoung_beyond_2024,
	title = {Beyond {Increasing} {Sample} {Sizes}: {Optimizing} {Effect} {Sizes} in {Neuroimaging} {Research} on {Individual} {Differences}},
	copyright = {https://creativecommons.org/licenses/by/4.0/legalcode},
	shorttitle = {Beyond {Increasing} {Sample} {Sizes}},
	url = {https://osf.io/bjn62},
	doi = {10.31219/osf.io/bjn62},
	abstract = {Linking neurobiology to relatively stable individual differences in cognition, emotion, motivation, and behavior can require large sample sizes to yield replicable results. Given the nature of between-person research, sample sizes at least in the hundreds are likely to be necessary in most neuroimaging studies of individual differences, regardless of whether they are investigating the whole brain or more focal hypotheses. However, the appropriate sample size depends on the expected effect size. Therefore, we propose four strategies to increase effect sizes in neuroimaging research, which may help to enable the detection of replicable between-person effects in samples in the hundreds rather than the thousands: (1) theoretical matching between neuroimaging tasks and behavioral constructs of interest; (2) increasing the reliability of both neural and psychological measurement; (3) individualization of measures for each participant; and (4) using multivariate approaches with cross-validation instead of univariate approaches. We discuss challenges associated with these methods and highlight strategies for improvements that will help the field to move toward a more robust and accessible neuroscience of individual differences.},
	language = {en},
	urldate = {2024-07-30},
	author = {DeYoung, Colin G. and Hilger, Kirsten and Hanson, Jamie L. and Abend, Rany and Allen, Timothy and Beaty, Roger and Blain, Scott D. and Chavez, Robert and Engel, Stephen A. and Ma, Feilong and Fornito, Alex and Genç, Erhan and Goghari, Vina and Grazioplene, Rachael G. and Homan, Philipp and Joyner, Keenan and Kaczkurkin, Antonia N. and Latzman, Robert D and Martin, Elizabeth A and Nikolaidis, Aki and Pickering, Alan and Safron, Adam and Sassenberg, Tyler and Servaas, Michelle and Smillie, Luke D. and Spreng, R. Nathan and Viding, Essi and Wacker, Jan},
	month = jul,
	year = {2024},
	file = {DeYoung et al. - 2024 - Beyond Increasing Sample Sizes Optimizing Effect .pdf:/home/alpron/Zotero/storage/ZS9NMZRN/DeYoung et al. - 2024 - Beyond Increasing Sample Sizes Optimizing Effect .pdf:application/pdf},
}

@article{tendler_why_2023,
	title = {Why every lab needs a handbook},
	volume = {12},
	issn = {2050-084X},
	url = {https://elifesciences.org/articles/88853},
	doi = {10.7554/eLife.88853},
	abstract = {A lab handbook is a flexible document that outlines the ethos of a research lab or group. A good handbook will outline the different roles within the lab, explain what is expected of all lab members, provide an overview of the culture the lab aims to create, and describe how the lab supports its members so that they can develop as researchers. Here we describe how we wrote a lab handbook for a large research group, and provide resources to help other labs write their own handbooks.},
	language = {en},
	urldate = {2024-08-01},
	journal = {eLife},
	author = {Tendler, Benjamin C and Welland, Maddie and Miller, Karla L and {The WIN Handbook Team}},
	month = jul,
	year = {2023},
	keywords = {to read, best practices},
	pages = {e88853},
	file = {Tendler et al. - 2023 - Why every lab needs a handbook.pdf:/home/alpron/Zotero/storage/75L3MYKQ/Tendler et al. - 2023 - Why every lab needs a handbook.pdf:application/pdf},
}

@article{feldman_value_2024,
	title = {The value of replications goes beyond replicability and is tied to the value of the research it replicates: {Commentary} on {Isager} et al. (2024)},
	copyright = {Creative Commons Attribution 4.0 International},
	shorttitle = {The value of replications goes beyond replicability and is tied to the value of the research it replicates},
	url = {https://osf.io/btnuj/},
	doi = {10.17605/OSF.IO/BTNUJ},
	urldate = {2024-08-01},
	author = {Feldman, Gilad},
	collaborator = {{Center For Open Science}},
	year = {2024},
	note = {Publisher: OSF},
	keywords = {to read},
	file = {Feldman - 2024 - The value of replications goes beyond replicabilit.pdf:/home/alpron/Zotero/storage/MAJXU58Z/Feldman - 2024 - The value of replications goes beyond replicabilit.pdf:application/pdf},
}

@article{isager_exploring_2024,
	title = {Exploring a formal approach to selecting studies for replication: {A} feasibility study in social neuroscience},
	volume = {171},
	issn = {00109452},
	shorttitle = {Exploring a formal approach to selecting studies for replication},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0010945223002691},
	doi = {10.1016/j.cortex.2023.10.012},
	language = {en},
	urldate = {2024-08-01},
	journal = {Cortex},
	author = {Isager, Peder M. and Lakens, Daniël and Van Leeuwen, Thed and Van 'T Veer, Anna E.},
	month = feb,
	year = {2024},
	pages = {330--346},
	file = {Isager et al. - 2024 - Exploring a formal approach to selecting studies f.pdf:/home/alpron/Zotero/storage/Z2VP3RRD/Isager et al. - 2024 - Exploring a formal approach to selecting studies f.pdf:application/pdf},
}

@article{mandl_addressing_2024,
	title = {Addressing researcher degrees of freedom through {minP} adjustment},
	volume = {24},
	issn = {1471-2288},
	url = {https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-024-02279-2},
	doi = {10.1186/s12874-024-02279-2},
	abstract = {Abstract
            
              When different researchers study the same research question using the same dataset they may obtain different and potentially even conflicting results. This is because there is often substantial flexibility in researchers’ analytical choices, an issue also referred to as “researcher degrees of freedom”. Combined with selective reporting of the smallest
              p
              -value or largest effect, researcher degrees of freedom may lead to an increased rate of false positive and overoptimistic results. In this paper, we address this issue by formalizing the multiplicity of analysis strategies as a multiple testing problem. As the test statistics of different analysis strategies are usually highly dependent, a naive approach such as the Bonferroni correction is inappropriate because it leads to an unacceptable loss of power. Instead, we propose using the “minP” adjustment method, which takes potential test dependencies into account and approximates the underlying null distribution of the minimal
              p
              -value through a permutation-based procedure. This procedure is known to achieve more power than simpler approaches while ensuring a weak control of the family-wise error rate. We illustrate our approach for addressing researcher degrees of freedom by applying it to a study on the impact of perioperative
              
                
                  \$\$paO\_2\$\$
                  
                    
                      p
                      a
                      
                        O
                        2
                      
                    
              on post-operative complications after neurosurgery. A total of 48 analysis strategies are considered and adjusted using the minP procedure. This approach allows to selectively report the result of the analysis strategy yielding the most convincing evidence, while controlling the type 1 error—and thus the risk of publishing false positive results that may not be replicable.},
	language = {en},
	number = {1},
	urldate = {2024-08-01},
	journal = {BMC Medical Research Methodology},
	author = {Mandl, Maximilian M. and Becker-Pennrich, Andrea S. and Hinske, Ludwig C. and Hoffmann, Sabine and Boulesteix, Anne-Laure},
	month = jul,
	year = {2024},
	keywords = {to read, multiverse},
	pages = {152},
	file = {Mandl et al. - 2024 - Addressing researcher degrees of freedom through m.pdf:/home/alpron/Zotero/storage/DZMEFP7J/Mandl et al. - 2024 - Addressing researcher degrees of freedom through m.pdf:application/pdf},
}

@article{field_consequences_2024,
	title = {Consequences of the {Scientific} {Reform} {Movement}},
	volume = {4},
	issn = {2667-1204},
	doi = {https://doi.org/10.36850/jote.i4.1},
	language = {en},
	number = {1},
	editor = {Field, Sarahanne M. and van Dongen, Noah and Tiokhin, Leo and 0'Mahony, Aoife and Kaplan, Rebecca and Visser, Alex and Robaard, Meike and Prinsen, Jip and Korna, Thomas F.K.},
	month = may,
	year = {2024},
	note = {Special Issue},
}

@misc{sadil_maps_2024,
	title = {From {Maps} to {Models}: {A} {Survey} on the {Reliability} of {Small} {Studies} of {Task}-{Based} {fMRI}},
	copyright = {http://creativecommons.org/licenses/by-nc/4.0/},
	shorttitle = {From {Maps} to {Models}},
	url = {http://biorxiv.org/lookup/doi/10.1101/2024.08.05.606611},
	doi = {10.1101/2024.08.05.606611},
	abstract = {Task-based functional magnetic resonance imaging is a powerful tool for studying brain function, but neuroimaging research produces ongoing concerns regarding small-sample studies and how to interpret them. Although it is well understood that larger samples are preferable, many situations require researchers to make judgments from small studies, including reviewing the existing literature, analyzing pilot data, or assessing subsamples. Quantitative guidance on how to make these judgments remains scarce. To address this, we leverage the Human Connectome Project's Young Adult dataset to survey various analyses--from regional activation maps to predictive models. We find that, for some classic analyses such as detecting regional activation or cluster peak location, studies with as few as 40 subjects are adequate, although this depends crucially on effect sizes. For predictive modeling, similar sizes can be adequate for detecting whether features are predictable, but at least an order of magnitude more (at least hundreds) may be required for developing consistent predictions. These results offer valuable insights for designing and interpreting fMRI studies, emphasizing the importance of considering effect size, sample size, and analysis approach when assessing the reliability of findings. We hope that this survey serves as a reference for identifying which kinds of research questions can be reliably answered with small-scale studies.},
	language = {en},
	urldate = {2024-08-09},
	author = {Sadil, Patrick and Lindquist, Martin A.},
	month = aug,
	year = {2024},
	keywords = {to read},
}

@article{taylor_set_2024,
	title = {A {Set} of {FMRI} {Quality} {Control} {Tools} in {AFNI}: {Systematic}, in-depth, and interactive {QC} with afni\_proc.py and more},
	volume = {2},
	issn = {2837-6056},
	shorttitle = {A {Set} of {FMRI} {Quality} {Control} {Tools} in {AFNI}},
	url = {https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00246/123633/A-Set-of-FMRI-Quality-Control-Tools-in-AFNI},
	doi = {10.1162/imag_a_00246},
	abstract = {Abstract
            Quality control (QC) assessment is a vital part of FMRI processing and analysis, and a typically underdiscussed aspect of reproducibility. This includes checking datasets at their very earliest stages (acquisition and conversion) through their processing steps (e.g., alignment and motion correction) to regression modeling (correct stimuli, no collinearity, valid fits, enough degrees of freedom, etc.) for each subject. There are a wide variety of features to verify throughout any single-subject processing pipeline, both quantitatively and qualitatively. We present several FMRI preprocessing QC features available in the AFNI toolbox, many of which are automatically generated by the pipeline-creation tool, afni\_proc.py. These items include a modular HTML document that covers full single-subject processing from the raw data through statistical modeling, several review scripts in the results directory of processed data, and command line tools for identifying subjects with one or more quantitative properties across a group (such as triaging warnings, making exclusion criteria, or creating informational tables). The HTML itself contains several buttons that efficiently facilitate interactive investigations into the data, when deeper checks are needed beyond the systematic images. The pages are linkable, so that users can evaluate individual items across a group, for increased sensitivity to differences (e.g., in alignment or regression modeling images). Finally, the QC document contains rating buttons for each “QC block,” as well as comment fields for each, to facilitate both saving and sharing the evaluations. This increases the specificity of QC, as well as its shareability, as these files can be shared with others and potentially uploaded into repositories, promoting transparency and open science. We describe the features and applications of these QC tools for FMRI.},
	language = {en},
	urldate = {2024-08-09},
	journal = {Imaging Neuroscience},
	author = {Taylor, Paul A. and Glen, Daniel R. and Chen, Gang and Cox, Robert W. and Hanayik, Taylor and Rorden, Chris and Nielson, Dylan M. and Rajendra, Justin K. and Reynolds, Richard C.},
	month = aug,
	year = {2024},
	keywords = {to read, qc},
	pages = {1--39},
	file = {Taylor et al. - 2024 - A Set of FMRI Quality Control Tools in AFNI Syste.pdf:/home/alpron/Zotero/storage/L84FIQ4Z/Taylor et al. - 2024 - A Set of FMRI Quality Control Tools in AFNI Syste.pdf:application/pdf},
}

@misc{cohen-adad_open_2024,
	title = {Open {Source} in {Lab} {Management}},
	url = {http://arxiv.org/abs/2405.07774},
	abstract = {This document explores the advantages of integrating open source software and practices in managing a scientific lab, emphasizing reproducibility and the avoidance of pitfalls. It details practical applications from website management using GitHub Pages to organizing datasets in compliance with BIDS standards, highlights the importance of continuous testing for data integrity, IT management through Ansible for efficient system configuration, open source software development. The broader goal is to promote transparent, reproducible science by adopting open source tools. This approach not only saves time but exposes students to best practices, enhancing the transparency and reproducibility of scientific research.},
	urldate = {2024-08-09},
	publisher = {arXiv},
	author = {Cohen-Adad, Julien},
	month = may,
	year = {2024},
	note = {arXiv:2405.07774 [cs]},
	keywords = {to read},
	file = {Cohen-Adad - 2024 - Open Source in Lab Management.html:/home/alpron/Zotero/storage/NSUKMR8S/Cohen-Adad - 2024 - Open Source in Lab Management.html:text/html;Cohen-Adad - 2024 - Open Source in Lab Management.pdf:/home/alpron/Zotero/storage/U8PYI68X/Cohen-Adad - 2024 - Open Source in Lab Management.pdf:application/pdf},
}

@article{desrosiers-gregoire_standardized_2024,
	title = {A standardized image processing and data quality platform for rodent {fMRI}},
	volume = {15},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-024-50826-8},
	doi = {10.1038/s41467-024-50826-8},
	abstract = {Abstract
            Functional magnetic resonance imaging in rodents holds great potential for advancing our understanding of brain networks. Unlike the human community, there remains no standardized resource in rodents for image processing, analysis and quality control, posing significant reproducibility limitations. Our software platform, Rodent Automated Bold Improvement of EPI Sequences, is a pipeline designed to address these limitations for preprocessing, quality control, and confound correction, along with best practices for reproducibility and transparency. We demonstrate the robustness of the preprocessing workflow by validating performance across multiple acquisition sites and both mouse and rat data. Building upon a thorough investigation into data quality metrics across acquisition sites, we introduce guidelines for the quality control of network analysis and offer recommendations for addressing issues. Taken together, this software platform will allow the emerging community to adopt reproducible practices and foster progress in translational neuroscience.},
	language = {en},
	number = {1},
	urldate = {2024-08-12},
	journal = {Nature Communications},
	author = {Desrosiers-Grégoire, Gabriel and Devenyi, Gabriel A. and Grandjean, Joanes and Chakravarty, M. Mallar},
	month = aug,
	year = {2024},
	keywords = {to read},
	pages = {6708},
	file = {Desrosiers-Grégoire et al. - 2024 - A standardized image processing and data quality p.pdf:/home/alpron/Zotero/storage/3J874RHT/Desrosiers-Grégoire et al. - 2024 - A standardized image processing and data quality p.pdf:application/pdf},
}

@article{karakuzu_qmri-bids_2022,
	title = {{qMRI}-{BIDS}: {An} extension to the brain imaging data structure for quantitative magnetic resonance imaging data},
	volume = {9},
	issn = {2052-4463},
	shorttitle = {{qMRI}-{BIDS}},
	url = {https://www.nature.com/articles/s41597-022-01571-4},
	doi = {10.1038/s41597-022-01571-4},
	abstract = {Abstract
            
              The Brain Imaging Data Structure (BIDS) established community consensus on the organization of data and metadata for several neuroimaging modalities. Traditionally, BIDS had a strong focus on functional magnetic resonance imaging (MRI) datasets and lacked guidance on how to store
              multimodal
              structural MRI datasets. Here, we present and describe the BIDS Extension Proposal 001 (BEP001), which adds a range of quantitative MRI (qMRI) applications to the BIDS. In general, the aim of qMRI is to characterize brain microstructure by quantifying the physical MR parameters of the tissue via computational, biophysical models. By proposing this new standard, we envision standardization of qMRI through multicenter dissemination of interoperable datasets. This way, BIDS can act as a catalyst of convergence between qMRI methods development and application-driven neuroimaging studies that can help develop quantitative biomarkers for neural tissue characterization. In conclusion, this BIDS extension offers a common ground for developers to exchange novel imaging data and tools, reducing the entrance barrier for qMRI in the field of neuroimaging.},
	language = {en},
	number = {1},
	urldate = {2024-08-12},
	journal = {Scientific Data},
	author = {Karakuzu, Agah and Appelhoff, Stefan and Auer, Tibor and Boudreau, Mathieu and Feingold, Franklin and Khan, Ali R. and Lazari, Alberto and Markiewicz, Chris and Mulder, Martijn and Phillips, Christophe and Salo, Taylor and Stikov, Nikola and Whitaker, Kirstie and De Hollander, Gilles},
	month = aug,
	year = {2022},
	keywords = {BIDS, to read},
	pages = {517},
	file = {Karakuzu et al. - 2022 - qMRI-BIDS An extension to the brain imaging data .pdf:/home/alpron/Zotero/storage/J6M7GHA3/Karakuzu et al. - 2022 - qMRI-BIDS An extension to the brain imaging data .pdf:application/pdf},
}

@misc{wang_reproducible_2023,
	title = {A reproducible benchmark of resting-state {fMRIdenoising} strategies using {fMRIPrep} and {Nilearn}},
	copyright = {http://creativecommons.org/licenses/by/4.0/},
	url = {https://neurolibre.org/papers/10.55458/neurolibre.00012},
	doi = {10.55458/neurolibre.00012},
	abstract = {Reducing contributions from non-neuronal sources is a crucial step in functional magnetic resonance imaging (fMRI) connectivity analyses. Many viable strategies for denoising fMRI are used in the literature, and practitioners rely on denoising benchmarks for guidance in the selection of an appropriate choice for their study. However, fMRI denoising software is an ever-evolving field, and the benchmarks can quickly become obsolete as the techniques or implementations change. In this work, we present a fully reproducible denoising benchmark featuring a range of denoising strategies and evaluation metrics for connectivity analyses, built primarily on the fMRIPrep (Esteban et al., 2018) and Nilearn (Abraham et al., 2014) software packages. We apply this reproducible benchmark to investigate the robustness of the conclusions across two different datasets and two versions of fMRIPrep. The majority of benchmark results were consistent with prior literature. Scrubbing, a technique which excludes time points with excessive motion, combined with global signal regression, is generally effective at noise removal. Scrubbing however disrupts the continuous sampling of brain images and is incompatible with some statistical analyses, e.g. auto-regressive modeling. In this case, a simple strategy using motion parameters, average activity in select brain compartments, and global signal regression should be preferred. Importantly, we found that certain denoising strategies behave inconsistently across datasets and/or versions of fMRIPrep, or had a different behavior than in previously published benchmarks. These results demonstrate that a reproducible denoising benchmark can effectively assess the robustness of conclusions across multiple datasets and software versions. In addition to reproducing core computations, interested readers can also reproduce or modify the figures of the article using the Jupyter Book project (Granger \& Pérez, 2021) and the Neurolibre (Karakuzu et al., 2022) reproducible preprint server. With the denoising benchmark, we hope to provide useful guidelines for the community, and that our software infrastructure will facilitate continued development as the state-of-the-art advances.},
	language = {en},
	urldate = {2024-08-12},
	author = {Wang, Hao-Ting and Meisler, Steven L and Sharmarke, Hanad and Clarke, Natasha and Paugam, François and Gensollen, Nicolas and Markiewicz, Christopher J and Thirion, Bertrand and Bellec, Pierre},
	month = jun,
	year = {2023},
	file = {Wang et al. - 2023 - A reproducible benchmark of resting-state fMRIdeno.pdf:/home/alpron/Zotero/storage/AFHQLWEI/Wang et al. - 2023 - A reproducible benchmark of resting-state fMRIdeno.pdf:application/pdf},
}

@article{zabihi_nonlinear_2024,
	title = {Nonlinear latent representations of high-dimensional task-{fMRI} data: {Unveiling} cognitive and behavioral insights in heterogeneous spatial maps},
	volume = {19},
	issn = {1932-6203},
	shorttitle = {Nonlinear latent representations of high-dimensional task-{fMRI} data},
	url = {https://dx.plos.org/10.1371/journal.pone.0308329},
	doi = {10.1371/journal.pone.0308329},
	abstract = {Finding an interpretable and compact representation of complex neuroimaging data is extremely useful for understanding brain behavioral mapping and hence for explaining the biological underpinnings of mental disorders. However, hand-crafted representations, as well as linear transformations, may inadequately capture the considerable variability across individuals. Here, we implemented a data-driven approach using a three-dimensional autoencoder on two large-scale datasets. This approach provides a latent representation of high-dimensional task-fMRI data which can account for demographic characteristics whilst also being readily interpretable both in the latent space learned by the autoencoder and in the original voxel space. This was achieved by addressing a joint optimization problem that simultaneously reconstructs the data and predicts clinical or demographic variables. We then applied normative modeling to the latent variables to define summary statistics (‘latent indices’) and establish a multivariate mapping to non-imaging measures. Our model, trained with multi-task fMRI data from the Human Connectome Project (HCP) and UK biobank task-fMRI data, demonstrated high performance in age and sex predictions and successfully captured complex behavioral characteristics while preserving individual variability through a latent representation. Our model also performed competitively with respect to various baseline models including several variants of principal components analysis, independent components analysis and classical regions of interest, both in terms of reconstruction accuracy and strength of association with behavioral variables.},
	language = {en},
	number = {8},
	urldate = {2024-08-12},
	journal = {PLOS ONE},
	author = {Zabihi, Mariam and Kia, Seyed Mostafa and Wolfers, Thomas and De Boer, Stijn and Fraza, Charlotte and Dinga, Richard and Arenas, Alberto Llera and Bzdok, Danilo and Beckmann, Christian F. and Marquand, Andre},
	editor = {Tangherloni, Andrea},
	month = aug,
	year = {2024},
	pages = {e0308329},
	file = {Zabihi et al. - 2024 - Nonlinear latent representations of high-dimension.pdf:/home/alpron/Zotero/storage/CZZUYM3Q/Zabihi et al. - 2024 - Nonlinear latent representations of high-dimension.pdf:application/pdf},
}

@article{white_data_2022,
	title = {Data sharing and privacy issues in neuroimaging research: {Opportunities}, obstacles, challenges, and monsters under the bed},
	volume = {43},
	issn = {1065-9471, 1097-0193},
	shorttitle = {Data sharing and privacy issues in neuroimaging research},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/hbm.25120},
	doi = {10.1002/hbm.25120},
	abstract = {Collaborative networks and data sharing initiatives are broadening the opportunities for the advancement of science. These initiatives offer greater transparency in science, with the opportunity for external research groups to reproduce, replicate, and extend research findings. Further, larger datasets offer the opportunity to identify homogeneous patterns within subgroups of individuals, where these patterns may be obscured by the heterogeneity of the neurobiological measure in smaller samples. However, data sharing and data pooling initiatives are not without their challenges, especially with new laws that may at first glance appear quite restrictive for open science initiatives. Interestingly, what is key to some of these new laws (i.e, the European Union's general data protection regulation) is that they provide greater control of data to those who “give” their data for research purposes. Thus, the most important element in data sharing is allowing the participants to make informed decisions about how they want their data to be used, and, within the law of the specific country, to follow the participants' wishes. This framework encompasses obtaining thorough informed consent and allowing the participant to determine the extent that they want their data shared, many of the ethical and legal obstacles are reduced to just monsters under the bed. In this manuscript we discuss the many options and obstacles for data sharing, from fully open, to federated learning, to fully closed. Importantly, we highlight the intersection of data sharing, privacy, and data ownership and highlight specific examples that we believe are informative to the neuroimaging community.},
	language = {en},
	number = {1},
	urldate = {2024-08-12},
	journal = {Human Brain Mapping},
	author = {White, Tonya and Blok, Elisabet and Calhoun, Vince D.},
	month = jan,
	year = {2022},
	keywords = {to read, data sharing},
	pages = {278--291},
	file = {White et al. - 2022 - Data sharing and privacy issues in neuroimaging re.pdf:/home/alpron/Zotero/storage/CAG53AC7/White et al. - 2022 - Data sharing and privacy issues in neuroimaging re.pdf:application/pdf},
}

@article{mennes_making_2013,
	title = {Making data sharing work: {The} {FCP}/{INDI} experience},
	volume = {82},
	issn = {10538119},
	shorttitle = {Making data sharing work},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1053811912010671},
	doi = {10.1016/j.neuroimage.2012.10.064},
	abstract = {Over a decade ago, the fMRI Data Center (fMRIDC) pioneered open-access data sharing in the task-based functional neuroimaging community. Well ahead of its time, the fMRIDC effort encountered logistical, sociocultural and funding barriers that impeded the ﬁeld-wise instantiation of open-access data sharing. In 2009, ambitions for open-access data sharing were revived in the resting state functional MRI community in the form of two grassroots initiatives: the 1000 Functional Connectomes Project (FCP) and its successor, the International Neuroimaging Datasharing Initiative (INDI). Beyond providing open access to thousands of clinical and non-clinical imaging datasets, the FCP and INDI have demonstrated the feasibility of large-scale data aggregation for hypothesis generation and testing. Yet, the success of the FCP and INDI should not be confused with widespread embracement of open-access data sharing. Reminiscent of the challenges faced by fMRIDC, key controversies persist and include participant privacy, the role of informatics, and the logistical and cultural challenges of establishing an open science ethos. We discuss the FCP and INDI in the context of these challenges, highlighting the promise of current initiatives and suggesting solutions for possible pitfalls.},
	language = {en},
	urldate = {2024-08-12},
	journal = {NeuroImage},
	author = {Mennes, Maarten and Biswal, Bharat B. and Castellanos, F. Xavier and Milham, Michael P.},
	month = nov,
	year = {2013},
	keywords = {to read, data sharing},
	pages = {683--691},
	file = {Mennes et al. - 2013 - Making data sharing work The FCPINDI experience.pdf:/home/alpron/Zotero/storage/E2SH89FC/Mennes et al. - 2013 - Making data sharing work The FCPINDI experience.pdf:application/pdf},
}

@article{li_moving_2024,
	title = {Moving beyond processing- and analysis-related variation in resting-state functional brain imaging},
	issn = {2397-3374},
	url = {https://www.nature.com/articles/s41562-024-01942-4},
	doi = {10.1038/s41562-024-01942-4},
	language = {en},
	urldate = {2024-08-12},
	journal = {Nature Human Behaviour},
	author = {Li, Xinhui and Bianchini Esper, Nathalia and Ai, Lei and Giavasis, Steve and Jin, Hecheng and Feczko, Eric and Xu, Ting and Clucas, Jon and Franco, Alexandre and Sólon Heinsfeld, Anibal and Adebimpe, Azeez and Vogelstein, Joshua T. and Yan, Chao-Gan and Esteban, Oscar and Poldrack, Russell A. and Craddock, Cameron and Fair, Damien and Satterthwaite, Theodore and Kiar, Gregory and Milham, Michael P.},
	month = aug,
	year = {2024},
	keywords = {Important, fMRI, to read},
	file = {Li et al. - 2024 - Moving beyond processing- and analysis-related var.pdf:/home/alpron/Zotero/storage/6TW66TT6/Li et al. - 2024 - Moving beyond processing- and analysis-related var.pdf:application/pdf},
}

@article{mehta_xcp-d_2024,
	title = {{XCP}-{D}: {A} {Robust} {Pipeline} for the {Post}-{Processing} of {fMRI} data},
	issn = {2837-6056},
	shorttitle = {{XCP}-{D}},
	url = {https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00257/123715/XCP-D-A-Robust-Pipeline-for-the-Post-Processing-of},
	doi = {10.1162/imag_a_00257},
	abstract = {Abstract
            Functional neuroimaging is an essential tool for neuroscience research. Pre-processing pipelines produce standardized, minimally pre-processed data to support a range of potential analyses. However, post-processing is not similarly standardized. While several options for post-processing exist, they may not support output from different pre-processing pipelines, may have limited documentation, and may not follow generally accepted data organization standards (e.g. BIDS). In response, we present XCP-D: a collaborative effort between PennLINC at the University of Pennsylvania and the DCAN lab at the University of Minnesota. XCP-D uses an open development model on GitHub and incorporates continuous integration testing; it is distributed as a Docker container or Apptainer image. XCP-D generates denoised BOLD images and functional derivatives from resting-state data in either NIfTI or CIFTI files following pre-processing with fMRIPrep, HCP, or ABCD-BIDS pipelines. Even prior to its official release, XCP-D has been downloaded \&gt;5,000 times from DockerHub. Together, XCP-D facilitates robust, scalable, and reproducible post-processing of fMRI data.},
	language = {en},
	urldate = {2024-08-12},
	journal = {Imaging Neuroscience},
	author = {Mehta, Kahini and Salo, Taylor and Madison, Thomas J. and Adebimpe, Azeez and Bassett, Danielle S. and Bertolero, Max and Cieslak, Matthew and Covitz, Sydney and Houghton, Audrey and Keller, Arielle S. and Lundquist, Jacob T. and Luo, Audrey and Miranda-Dominguez, Oscar and Nelson, Steve M. and Shafiei, Golia and Shanmugan, Sheila and Shinohara, Russell T. and Smyser, Christopher D. and Sydnor, Valerie J. and Weldon, Kimberly B. and Feczko, Eric and Fair, Damien A. and Satterthwaite, Theodore D.},
	month = jul,
	year = {2024},
	keywords = {fMRI, to read},
}

@article{caeyenberghs_enigmas_2024,
	title = {{ENIGMA}’s simple seven: {Recommendations} to enhance the reproducibility of resting-state {fMRI} in traumatic brain injury},
	volume = {42},
	issn = {22131582},
	shorttitle = {{ENIGMA}’s simple seven},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S221315822400024X},
	doi = {10.1016/j.nicl.2024.103585},
	language = {en},
	urldate = {2024-08-12},
	journal = {NeuroImage: Clinical},
	author = {Caeyenberghs, Karen and Imms, Phoebe and Irimia, Andrei and Monti, Martin M. and Esopenko, Carrie and De Souza, Nicola L. and Dominguez D, Juan F. and Newsome, Mary R. and Dobryakova, Ekaterina and Cwiek, Andrew and Mullin, Hollie A.C. and Kim, Nicholas J. and Mayer, Andrew R. and Adamson, Maheen M. and Bickart, Kevin and Breedlove, Katherine M. and Dennis, Emily L. and Disner, Seth G. and Haswell, Courtney and Hodges, Cooper B. and Hoskinson, Kristen R. and Johnson, Paula K. and Königs, Marsh and Li, Lucia M. and Liebel, Spencer W. and Livny, Abigail and Morey, Rajendra A. and Muir, Alexandra M. and Olsen, Alexander and Razi, Adeel and Su, Matthew and Tate, David F. and Velez, Carmen and Wilde, Elisabeth A. and Zielinski, Brandon A. and Thompson, Paul M. and Hillary, Frank G.},
	year = {2024},
	keywords = {Review, to read},
	pages = {103585},
	file = {Caeyenberghs et al. - 2024 - ENIGMA’s simple seven Recommendations to enhance .pdf:/home/alpron/Zotero/storage/YQPWVWSN/Caeyenberghs et al. - 2024 - ENIGMA’s simple seven Recommendations to enhance .pdf:application/pdf},
}

@article{makowski_quality_2024,
	title = {Quality over quantity: powering neuroimaging samples in psychiatry},
	issn = {0893-133X, 1740-634X},
	shorttitle = {Quality over quantity},
	url = {https://www.nature.com/articles/s41386-024-01893-4},
	doi = {10.1038/s41386-024-01893-4},
	language = {en},
	urldate = {2024-08-12},
	journal = {Neuropsychopharmacology},
	author = {Makowski, Carolina and Nichols, Thomas E. and Dale, Anders M.},
	month = jun,
	year = {2024},
	keywords = {to read},
}

@article{germani_predire_nodate,
	title = {Prédire l'évolution de la maladie de {Parkinson} à l'aide de données cliniques et d'{IRM} fonctionnelles: reproduction et robustesse d'une étude},
	language = {fr},
	author = {Germani, Elodie and Baghwat, Nikhil and Dugré, Mathieu and Gau, Remi and Sokolowski, Andrzej and Fromont, Elisa and Maumet, Camille and Sharp, Madeleine and Poline, Jean-Baptiste and Glatard, Tristan},
	file = {Germani et al. - Prédire l'évolution de la maladie de Parkinson à l.pdf:/home/alpron/Zotero/storage/XSJHIUX6/Germani et al. - Prédire l'évolution de la maladie de Parkinson à l.pdf:application/pdf},
}

@article{horien_hitchhikers_2020,
	title = {A hitchhiker’s guide to working with large, open-source neuroimaging datasets},
	volume = {5},
	issn = {2397-3374},
	url = {https://www.nature.com/articles/s41562-020-01005-4},
	doi = {10.1038/s41562-020-01005-4},
	language = {en},
	number = {2},
	urldate = {2024-08-26},
	journal = {Nature Human Behaviour},
	author = {Horien, Corey and Noble, Stephanie and Greene, Abigail S. and Lee, Kangjoo and Barron, Daniel S. and Gao, Siyuan and O’Connor, David and Salehi, Mehraveh and Dadashkarimi, Javid and Shen, Xilin and Lake, Evelyn M. R. and Constable, R. Todd and Scheinost, Dustin},
	month = dec,
	year = {2020},
	keywords = {Review, to read},
	pages = {185--193},
	file = {Horien et al. - 2020 - A hitchhiker’s guide to working with large, open-s.pdf:/home/alpron/Zotero/storage/RS2FQACP/Horien et al. - 2020 - A hitchhiker’s guide to working with large, open-s.pdf:application/pdf},
}

@misc{bak-coleman_replication_2022,
	title = {The replication crisis is not a crisis of false positives},
	copyright = {https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode},
	url = {https://osf.io/rkyf7},
	doi = {10.31235/osf.io/rkyf7},
	abstract = {Efforts to replicate portions of the scientific literature have lead to widely varying and often low rates of replicability. This has raised concerns over a ``replication crisis'' whereby many of the statistically significant claims in the published literature are thought to be false positives, due to some combination of publication bias and widespread use of questionable research practices. Here, we re-analyze data from large-scale replication efforts and show that few, if any, replication failures can be attributed to false positives. We then present a minimal, alternative model of how replication failures can occur even in the absence of false positives. Using our model, we show that variation in estimates of replicability across social science appears largely to be an artifact of replication sample size. Our results further suggest that file-drawer sizes are likely much smaller, and Questionable Research Practices less abundant, than commonly assumed.  We anticipate our findings will be a starting point for more formal and nuanced discussion of the health of the scientific literature and areas for improvement.},
	urldate = {2024-09-24},
	author = {Bak-Coleman, Joseph B and Mann, Richard P. and Bergstrom, Carl T. and Gross, Kevin and West, Jevin},
	month = apr,
	year = {2022},
	keywords = {to read, best practices},
	file = {Bak-Coleman et al. - 2022 - The replication crisis is not a crisis of false po.pdf:/home/alpron/Zotero/storage/CRNLNR9E/Bak-Coleman et al. - 2022 - The replication crisis is not a crisis of false po.pdf:application/pdf},
}

@article{van_kampen_encore_2024,
	title = {{ENCORE}: a practical implementation to improve reproducibility and transparency of computational research},
	volume = {15},
	issn = {2041-1723},
	shorttitle = {{ENCORE}},
	url = {https://www.nature.com/articles/s41467-024-52446-8},
	doi = {10.1038/s41467-024-52446-8},
	language = {en},
	number = {1},
	urldate = {2024-09-24},
	journal = {Nature Communications},
	author = {Van Kampen, Antoine H. C. and Mahamune, Utkarsh and Jongejan, Aldo and Van Schaik, Barbera D. C. and Balashova, Daria and Lashgari, Danial and Pras-Raves, Mia and Wever, Eric J. M. and Dane, Adrie D. and García-Valiente, Rodrigo and Moerland, Perry D.},
	month = sep,
	year = {2024},
	keywords = {to read},
	pages = {8117},
	file = {Van Kampen et al. - 2024 - ENCORE a practical implementation to improve repr.pdf:/home/alpron/Zotero/storage/38V6GQNY/Van Kampen et al. - 2024 - ENCORE a practical implementation to improve repr.pdf:application/pdf},
}

@misc{varoquaux_hype_2024,
	title = {Hype, {Sustainability}, and the {Price} of the {Bigger}-is-{Better} {Paradigm} in {AI}},
	url = {http://arxiv.org/abs/2409.14160},
	doi = {10.48550/arXiv.2409.14160},
	abstract = {With the growing attention and investment in recent AI approaches such as large language models, the narrative that the larger the AI system the more valuable, powerful and interesting it is is increasingly seen as common sense. But what is this assumption based on, and how are we measuring value, power, and performance? And what are the collateral consequences of this race to ever-increasing scale? Here, we scrutinize the current scaling trends and trade-offs across multiple axes and refute two common assumptions underlying the 'bigger-is-better' AI paradigm: 1) that improved performance is a product of increased scale, and 2) that all interesting problems addressed by AI require large-scale models. Rather, we argue that this approach is not only fragile scientifically, but comes with undesirable consequences. First, it is not sustainable, as its compute demands increase faster than model performance, leading to unreasonable economic requirements and a disproportionate environmental footprint. Second, it implies focusing on certain problems at the expense of others, leaving aside important applications, e.g. health, education, or the climate. Finally, it exacerbates a concentration of power, which centralizes decision-making in the hands of a few actors while threatening to disempower others in the context of shaping both AI research and its applications throughout society.},
	urldate = {2024-10-01},
	publisher = {arXiv},
	author = {Varoquaux, Gaël and Luccioni, Alexandra Sasha and Whittaker, Meredith},
	month = sep,
	year = {2024},
	note = {arXiv:2409.14160 [cs]},
	keywords = {AI ethics, big, Computer Science - Computers and Society},
	file = {arXiv Fulltext PDF:/home/alpron/Zotero/storage/9WFZRACE/Varoquaux et al. - 2024 - Hype, Sustainability, and the Price of the Bigger-.pdf:application/pdf},
}

@article{cokelaer_reprohackathons_2023,
	title = {Reprohackathons: promoting reproducibility in bioinformatics through training},
	volume = {39},
	issn = {1367-4811},
	shorttitle = {Reprohackathons},
	url = {https://doi.org/10.1093/bioinformatics/btad227},
	doi = {10.1093/bioinformatics/btad227},
	abstract = {The reproducibility crisis has highlighted the importance of improving the way bioinformatics data analyses are implemented, executed, and shared. To address this, various tools such as content versioning systems, workflow management systems, and software environment management systems have been developed. While these tools are becoming more widely used, there is still much work to be done to increase their adoption. The most effective way to ensure reproducibility becomes a standard part of most bioinformatics data analysis projects is to integrate it into the curriculum of bioinformatics Master’s programs.In this article, we present the Reprohackathon, a Master’s course that we have been running for the last 3 years at Université Paris-Saclay (France), and that has been attended by a total of 123 students. The course is divided into two parts. The first part includes lessons on the challenges related to reproducibility, content versioning systems, container management, and workflow systems. In the second part, students work on a data analysis project for 3–4 months, reanalyzing data from a previously published study. The Reprohackaton has taught us many valuable lessons, such as the fact that implementing reproducible analyses is a complex and challenging task that requires significant effort. However, providing in-depth teaching of the concepts and the tools during a Master’s degree program greatly improves students’ understanding and abilities in this area.},
	number = {Supplement\_1},
	urldate = {2024-10-01},
	journal = {Bioinformatics},
	author = {Cokelaer, Thomas and Cohen-Boulakia, Sarah and Lemoine, Frédéric},
	month = jun,
	year = {2023},
	keywords = {hackathon, training},
	pages = {i11--i20},
	file = {Full Text PDF:/home/alpron/Zotero/storage/XGMI5F5J/Cokelaer et al. - 2023 - Reprohackathons promoting reproducibility in bioi.pdf:application/pdf;Snapshot:/home/alpron/Zotero/storage/5USUN9I3/7210451.html:text/html},
}

@article{peng_heterogenous_2024,
	title = {Heterogenous brain activations across individuals localize to a common network},
	volume = {7},
	issn = {2399-3642},
	url = {https://www.nature.com/articles/s42003-024-06969-x},
	doi = {10.1038/s42003-024-06969-x},
	language = {en},
	number = {1},
	urldate = {2024-10-21},
	journal = {Communications Biology},
	author = {Peng, Shaoling and Cui, Zaixu and Zhong, Suyu and Zhang, Yanyang and Cohen, Alexander L. and Fox, Michael D. and Gong, Gaolang},
	month = oct,
	year = {2024},
	keywords = {Important, to\_read},
	pages = {1270},
	file = {Peng et al. - 2024 - Heterogenous brain activations across individuals .pdf:/home/alpron/Zotero/storage/7PH942WX/Peng et al. - 2024 - Heterogenous brain activations across individuals .pdf:application/pdf},
}

@misc{christodoulou_confidence_2024,
	title = {Confidence intervals uncovered: {Are} we ready for real-world medical imaging {AI}?},
	shorttitle = {Confidence intervals uncovered},
	url = {http://arxiv.org/abs/2409.17763},
	abstract = {Medical imaging is spearheading the AI transformation of healthcare. Performance reporting is key to determine which methods should be translated into clinical practice. Frequently, broad conclusions are simply derived from mean performance values. In this paper, we argue that this common practice is often a misleading simplification as it ignores performance variability. Our contribution is threefold. (1) Analyzing all MICCAI segmentation papers (n = 221) published in 2023, we first observe that more than 50\% of papers do not assess performance variability at all. Moreover, only one (0.5\%) paper reported confidence intervals (CIs) for model performance. (2) To address the reporting bottleneck, we show that the unreported standard deviation (SD) in segmentation papers can be approximated by a second-order polynomial function of the mean Dice similarity coefficient (DSC). Based on external validation data from 56 previous MICCAI challenges, we demonstrate that this approximation can accurately reconstruct the CI of a method using information provided in publications. (3) Finally, we reconstructed 95\% CIs around the mean DSC of MICCAI 2023 segmentation papers. The median CI width was 0.03 which is three times larger than the median performance gap between the first and second ranked method. For more than 60\% of papers, the mean performance of the second-ranked method was within the CI of the first-ranked method. We conclude that current publications typically do not provide sufficient evidence to support which models could potentially be translated into clinical practice.},
	language = {en},
	urldate = {2024-10-21},
	publisher = {arXiv},
	author = {Christodoulou, Evangelia and Reinke, Annika and Houhou, Rola and Kalinowski, Piotr and Erkan, Selen and Sudre, Carole H. and Burgos, Ninon and Boutaj, Sofiène and Loizillon, Sophie and Solal, Maëlys and Rieke, Nicola and Cheplygina, Veronika and Antonelli, Michela and Mayer, Leon D. and Tizabi, Minu D. and Cardoso, M. Jorge and Simpson, Amber and Jäger, Paul F. and Kopp-Schneider, Annette and Varoquaux, Gaël and Colliot, Olivier and Maier-Hein, Lena},
	month = sep,
	year = {2024},
	note = {arXiv:2409.17763 [cs]},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning},
	file = {Christodoulou et al. - 2024 - Confidence intervals uncovered Are we ready for r.pdf:/home/alpron/Zotero/storage/DRN46WQP/Christodoulou et al. - 2024 - Confidence intervals uncovered Are we ready for r.pdf:application/pdf},
}

@article{chen_sampling_2023,
	title = {Sampling inequalities affect generalization of neuroimaging-based diagnostic classifiers in psychiatry},
	volume = {21},
	issn = {1741-7015},
	url = {https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-023-02941-4},
	doi = {10.1186/s12916-023-02941-4},
	abstract = {Abstract
            
              Background
              The development of machine learning models for aiding in the diagnosis of mental disorder is recognized as a significant breakthrough in the field of psychiatry. However, clinical practice of such models remains a challenge, with poor generalizability being a major limitation.
            
            
              Methods
              
                Here, we conducted a pre-registered meta-research assessment on neuroimaging-based models in the psychiatric literature, quantitatively examining global and regional sampling issues over recent decades, from a view that has been relatively underexplored. A total of 476 studies (
                n
                 = 118,137) were included in the current assessment. Based on these findings, we built a comprehensive 5-star rating system to quantitatively evaluate the quality of existing machine learning models for psychiatric diagnoses.
              
            
              Results
              
                A global sampling inequality in these models was revealed quantitatively (sampling Gini coefficient (
                G
                ) = 0.81,
                p
                 {\textless} .01), varying across different countries (regions) (e.g., China,
                G
                 = 0.47; the USA,
                G
                 = 0.58; Germany,
                G
                 = 0.78; the UK,
                G
                 = 0.87). Furthermore, the severity of this sampling inequality was significantly predicted by national economic levels (
                β
                 =  − 2.75,
                p
                 {\textless} .001,
                R
                
                  2
                
                
                  adj
                
                 = 0.40;
                r
                 =  − .84, 95\% CI: − .41 to − .97), and was plausibly predictable for model performance, with higher sampling inequality for reporting higher classification accuracy. Further analyses showed that lack of independent testing (84.24\% of models, 95\% CI: 81.0–87.5\%), improper cross-validation (51.68\% of models, 95\% CI: 47.2–56.2\%), and poor technical transparency (87.8\% of models, 95\% CI: 84.9–90.8\%)/availability (80.88\% of models, 95\% CI: 77.3–84.4\%) are prevailing in current diagnostic classifiers despite improvements over time. Relating to these observations, model performances were found decreased in studies with independent cross-country sampling validations (all
                p
                 {\textless} .001, BF
                10
                 {\textgreater} 15). In light of this, we proposed a purpose-built quantitative assessment checklist, which demonstrated that the overall ratings of these models increased by publication year but were negatively associated with model performance.
              
            
              Conclusions
              Together, improving sampling economic equality and hence the quality of machine learning models may be a crucial facet to plausibly translating neuroimaging-based diagnostic classifiers into clinical practice.},
	language = {en},
	number = {1},
	urldate = {2024-10-21},
	journal = {BMC Medicine},
	author = {Chen, Zhiyi and Hu, Bowen and Liu, Xuerong and Becker, Benjamin and Eickhoff, Simon B. and Miao, Kuan and Gu, Xingmei and Tang, Yancheng and Dai, Xin and Li, Chao and Leonov, Artemiy and Xiao, Zhibing and Feng, Zhengzhi and Chen, Ji and Chuan-Peng, Hu},
	month = jul,
	year = {2023},
	pages = {241},
	file = {Chen et al. - 2023 - Sampling inequalities affect generalization of neu.pdf:/home/alpron/Zotero/storage/YTDHFA4C/Chen et al. - 2023 - Sampling inequalities affect generalization of neu.pdf:application/pdf},
}

@article{marelli_scrutinizing_2018,
	title = {Scrutinizing the {EU} {General} {Data} {Protection} {Regulation}},
	volume = {360},
	copyright = {http://www.sciencemag.org/about/science-licenses-journal-article-reuse},
	issn = {0036-8075, 1095-9203},
	url = {https://www.science.org/doi/10.1126/science.aar5419},
	doi = {10.1126/science.aar5419},
	abstract = {How will new decentralized governance impact research?
          , 
            
              On 25 May 2018, the European Union (EU) regulation 2016/679 on data protection, also known as the General Data Protection Regulation (GDPR), will take effect. The GDPR, which repeals previous European legislation on data protection (Directive 95/46/EC) (
              1
              ), is bound to have major effects on biomedical research and digital health technologies, in Europe and beyond, given the global reach of EU-based research and the prominence of international research networks requiring interoperability of standards. Here we describe ways in which the GDPR will become a critical tool to structure flexible governance for data protection. As a timely forecast for its potential impact, we analyze the implications of the GDPR in an ongoing paradigmatic legal controversy involving the database originally assembled by one of the world's first genomic biobanks, Shardna.},
	language = {en},
	number = {6388},
	urldate = {2024-10-22},
	journal = {Science},
	author = {Marelli, Luca and Testa, Giuseppe},
	month = may,
	year = {2018},
	pages = {496--498},
	file = {Submitted Version:/home/alpron/Zotero/storage/LIQDLUX2/Marelli and Testa - 2018 - Scrutinizing the EU General Data Protection Regula.pdf:application/pdf},
}

@article{poldrack_past_2024,
	title = {The past, present, and future of the brain imaging data structure ({BIDS})},
	volume = {2},
	issn = {2837-6056},
	url = {https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00103/119672/The-past-present-and-future-of-the-brain-imaging},
	doi = {10.1162/imag_a_00103},
	abstract = {Abstract
            The Brain Imaging Data Structure (BIDS) is a community-driven standard for the organization of data and metadata from a growing range of neuroscience modalities. This paper is meant as a history of how the standard has developed and grown over time. We outline the principles behind the project, the mechanisms by which it has been extended, and some of the challenges being addressed as it evolves. We also discuss the lessons learned through the project, with the aim of enabling researchers in other domains to learn from the success of BIDS.},
	language = {en},
	urldate = {2024-10-24},
	journal = {Imaging Neuroscience},
	author = {Poldrack, Russell A. and Markiewicz, Christopher J. and Appelhoff, Stefan and Ashar, Yoni K. and Auer, Tibor and Baillet, Sylvain and Bansal, Shashank and Beltrachini, Leandro and Benar, Christian G. and Bertazzoli, Giacomo and Bhogawar, Suyash and Blair, Ross W. and Bortoletto, Marta and Boudreau, Mathieu and Brooks, Teon L. and Calhoun, Vince D. and Castelli, Filippo Maria and Clement, Patricia and Cohen, Alexander L. and Cohen-Adad, Julien and D’Ambrosio, Sasha and De Hollander, Gilles and De La Iglesia-Vayá, María and De La Vega, Alejandro and Delorme, Arnaud and Devinsky, Orrin and Draschkow, Dejan and Duff, Eugene Paul and DuPre, Elizabeth and Earl, Eric and Esteban, Oscar and Feingold, Franklin W. and Flandin, Guillaume and Galassi, Anthony and Gallitto, Giuseppe and Ganz, Melanie and Gau, Rémi and Gholam, James and Ghosh, Satrajit S. and Giacomel, Alessio and Gillman, Ashley G. and Gleeson, Padraig and Gramfort, Alexandre and Guay, Samuel and Guidali, Giacomo and Halchenko, Yaroslav O. and Handwerker, Daniel A. and Hardcastle, Nell and Herholz, Peer and Hermes, Dora and Honey, Christopher J. and Innis, Robert B. and Ioanas, Horea-Ioan and Jahn, Andrew and Karakuzu, Agah and Keator, David B. and Kiar, Gregory and Kincses, Balint and Laird, Angela R. and Lau, Jonathan C. and Lazari, Alberto and Legarreta, Jon Haitz and Li, Adam and Li, Xiangrui and Love, Bradley C. and Lu, Hanzhang and Marcantoni, Eleonora and Maumet, Camille and Mazzamuto, Giacomo and Meisler, Steven L. and Mikkelsen, Mark and Mutsaerts, Henk and Nichols, Thomas E. and Nikolaidis, Aki and Nilsonne, Gustav and Niso, Guiomar and Norgaard, Martin and Okell, Thomas W. and Oostenveld, Robert and Ort, Eduard and Park, Patrick J. and Pawlik, Mateusz and Pernet, Cyril R. and Pestilli, Franco and Petr, Jan and Phillips, Christophe and Poline, Jean-Baptiste and Pollonini, Luca and Raamana, Pradeep Reddy and Ritter, Petra and Rizzo, Gaia and Robbins, Kay A. and Rockhill, Alexander P. and Rogers, Christine and Rokem, Ariel and Rorden, Chris and Routier, Alexandre and Saborit-Torres, Jose Manuel and Salo, Taylor and Schirner, Michael and Smith, Robert E. and Spisak, Tamas and Sprenger, Julia and Swann, Nicole C. and Szinte, Martin and Takerkart, Sylvain and Thirion, Bertrand and Thomas, Adam G. and Torabian, Sajjad and Varoquaux, Gael and Voytek, Bradley and Welzel, Julius and Wilson, Martin and Yarkoni, Tal and Gorgolewski, Krzysztof J.},
	month = mar,
	year = {2024},
	keywords = {Important, Review, BIDS, read},
	pages = {1--19},
	file = {Poldrack et al. - 2024 - The past, present, and future of the brain imaging.pdf:/home/alpron/Zotero/storage/QCTVGBUZ/Poldrack et al. - 2024 - The past, present, and future of the brain imaging.pdf:application/pdf},
}

@incollection{cosmo_software_2023,
	address = {Cham},
	title = {The {Software} {Heritage} {Open} {Science} {Ecosystem}},
	isbn = {978-3-031-36060-2},
	url = {https://doi.org/10.1007/978-3-031-36060-2_2},
	abstract = {Software Heritage is the largest public archive of software source code and associated development history, as captured by modern version control systems. As of July 2023, it has archived more than 16 billion unique source code files coming from more than 250 million collaborative development projects. In this chapter, we describe the Software Heritage ecosystem, focusing on research and open science use cases.},
	language = {en},
	urldate = {2024-07-12},
	booktitle = {Software {Ecosystems}: {Tooling} and {Analytics}},
	publisher = {Springer International Publishing},
	author = {Cosmo, Roberto Di and Zacchiroli, Stefano},
	editor = {Mens, Tom and De Roover, Coen and Cleve, Anthony},
	year = {2023},
	doi = {10.1007/978-3-031-36060-2_2},
	keywords = {Reproducibility, FAIR, open scienc, swh, swh data model, swh features},
	pages = {33--61},
	file = {Cosmo and Zacchiroli - 2023 - The Software Heritage Open Science Ecosystem.pdf:/home/alpron/Zotero/storage/X9QREIGC/Cosmo and Zacchiroli - 2023 - The Software Heritage Open Science Ecosystem.pdf:application/pdf},
}