From fa6a3de2af7ce524324794f9ddefe73cf973297d Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Wed, 12 Apr 2023 21:22:04 -0400 Subject: [PATCH 1/7] revise cli of babs-unzip; start to call babs_bootstrap for unzip project --- .vscode/launch.json | 52 ++++++++++++++---------- babs/babs.py | 52 ++++++++++++++++++------ babs/cli.py | 70 ++++++++++++++++++++++----------- notebooks/testing_babs_unzip.py | 9 +++++ 4 files changed, 129 insertions(+), 54 deletions(-) create mode 100644 notebooks/testing_babs_unzip.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 70279476..06ba9797 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,27 +18,27 @@ // "--project-root", // "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/test_babs_multi-ses_toybidsapp", // ], - "args": [ - "--where_project", - "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data", - "--project_name", - "test_babs_multi-ses_toybidsapp", - "--input", - "BIDS", - "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/w2nu3", - // "https://osf.io/w2nu3/", - "--container_ds", - "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/toybidsapp-container-docker", - "--container_name", - "toybidsapp-0-0-6", - "--container_config_yaml_file", - "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/babs/notebooks/example_container_toybidsapp.yaml", - "--type_session", - "multi-ses", - "--type_system", - "sge", - "--keep-if-failed" - ] + // "args": [ + // "--where_project", + // "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data", + // "--project_name", + // "test_babs_multi-ses_toybidsapp", + // "--input", + // "BIDS", + // "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/w2nu3", + // // "https://osf.io/w2nu3/", + // "--container_ds", + // "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/toybidsapp-container-docker", + // "--container_name", + // "toybidsapp-0-0-6", + // "--container_config_yaml_file", + // "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/babs/notebooks/example_container_toybidsapp.yaml", + // "--type_session", + // "multi-ses", + // "--type_system", + // "sge", + // "--keep-if-failed" + // ] // "args": [ // "--where_project", "/cbica/projects/BABS/data", // "--project_name", "test_babs_multi-ses_toybidsapp", @@ -61,6 +61,16 @@ // "--path-workspace", "/cbica/projects/BABS/data/test_babs_multi-ses_toybidsapp/analysis/logs", // "--path-check-setup", "/cbica/projects/BABS/data/test_babs_multi-ses_toybidsapp/analysis/code/check_setup" // ] + "args": [ + "--babs-project-root", + "/cbica/projects/BABS/data/test_babs_multi-ses_fmriprepfake", + "--where-unzip-project", + "/cbica/projects/BABS/data", + "--unzip-project-name", + "multi-ses_fmriprepfake_unzip", + "--container-config-yaml-file", + "/cbica/projects/BABS/babs/notebooks/example_container_fmriprepfake.yaml", + ] } ] } \ No newline at end of file diff --git a/babs/babs.py b/babs/babs.py index b4e108fc..abda84af 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -248,7 +248,7 @@ def wtf_key_info(self, flag_output_ria_only=False): def babs_bootstrap(self, input_ds, container_ds, container_name, container_config_yaml_file, - system): + system, if_unzip=False): """ Bootstrap a babs project: initialize datalad-tracked RIAs, generate scripts to be used, etc @@ -266,6 +266,8 @@ def babs_bootstrap(self, input_ds, of how to run the BIDS App container system: class `System` information about the cluster management system + if_unzip: bool + if bootstrap scripts for an unzip project (i.e., not regular BABS project) """ # ============================================================== @@ -1865,29 +1867,57 @@ def babs_merge(self, chunk_size, trial_run): + " not to push merging actions to output RIA.") print("\n`babs-merge` did not fully finish yet!") - def babs_unzip(container_config_yaml_file): + def babs_unzip(self, where_unzip_project, unzip_project_name, + container_config_yaml_file): """ - This function unzips results and extract desired files. - This is done in 3 steps: - 1. Generate scripts used by `babs-unzip` - 2. Run scripts to unzip data - 3. Merge all branches of unzipping + This function initialize an unzip project for a BABS project. Parameters: -------------- - config: dict - loaded container config yaml file + where_unzip_project: str + Absolute path to the directory where the unzip project will locate. + unzip_project_name: str + The name of the unzip project. + container_config_yaml_file: str or None + path to container's configuration YAML file. + It contains info of what files to unzip etc + `None` if not provided by the user. """ # ==================================================== # Generate scripts used by `babs-unzip` # ==================================================== - # Prepare input_ds_unzip: + # Prepare `input_ds_unzip` (class `Input_ds`) for unzipping: + # path to the output ria to be cloned: + # e.g., "ria+file:///path/to/my_BABS_project/output_ria#~data"" + input_ds_path = self.output_ria_url + "#~data" + input_ds_cli = [["results", input_ds_path]] + # initialize `Input_ds` class: + input_ds_unzip = Input_ds(input_ds_cli) + + # Initialize unzip project: + unzip_project_root = op.join(where_unzip_project, unzip_project_name) + # initialize: + unzip_babs_project = BABS(unzip_project_root, + self.type_session, # same as BABS project + self.type_system) # same as BABS project + # Other necessary inputs for `babs_bootstrap()`: + system = System(self.type_system) + # Call `babs_bootstrap()`: - # !!!! using babs_proj_unzip, instead current `self`!!! + # !!!! using `unzip_project_root`, instead of current `self`!!! + unzip_babs_project.babs_bootstrap( + input_ds=input_ds_unzip, + container_ds=None, # no container ds for bootstrap unzip project + container_name=None, + container_config_yaml_file=container_config_yaml_file, + system=system, + if_unzip=True) print("TODO") + # TODO: `container_config_yaml_file` could be `None`! + # Check before loading in `babs_bootstrap()`!!! # ==================================================== # Run scripts to unzip data diff --git a/babs/cli.py b/babs/cli.py index 35c8e74e..dc949600 100644 --- a/babs/cli.py +++ b/babs/cli.py @@ -33,11 +33,11 @@ def babs_init_cli(): formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--where_project", "--where-project", - help="Absolute path to the directory where the babs project will locate", + help="Absolute path to the directory where the BABS project will locate.", required=True) parser.add_argument( "--project_name", "--project-name", - help="The name of the babs project; " + help="The name of the BABS project; " "this folder will be automatically created in the directory" " specified in ``--where_project``.", required=True) @@ -708,15 +708,25 @@ def babs_unzip_cli(): description="``babs-unzip`` unzips results zip files and extracts desired files", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( - "--project_root", "--project-root", - help="Absolute path to the root of BABS project." + "--babs_project_root", "--babs-project-root", + help="Absolute path to the root of BABS project whose results will be unzipped." " For example, '/path/to/my_BABS_project/'.", required=True) + parser.add_argument( + "--where_unzip_project", "--where-unzip-project", + help="Absolute path to the directory where the unzip project will locate.", + required=True) + parser.add_argument( + "--unzip_project_name", "--unzip-project-name", + help="The name of the unzip project; " + "this folder will be automatically created in the directory" + " specified in ``--where_unzip_project``.", + required=True) parser.add_argument( '--container_config_yaml_file', '--container-config-yaml-file', help="Path to a YAML file of the BIDS App container that contains information of" " what files to unzip etc.") - + return parser @@ -725,36 +735,52 @@ def babs_unzip_main(): This is the core function of babs-unzip, which unzip results zip files and extracts desired files. - project_root: str - Absolute path to the root of BABS project. + babs_project_root: str + Absolute path to the root of BABS project to be unzipped. For example, '/path/to/my_BABS_project/'. - container_config_yaml_file: str + where_unzip_project: str + Absolute path to the directory where the unzip project will locate. + unzip_project_name: str + The name of the unzip project. + container_config_yaml_file: str or None path to container's configuration YAML file. - These two sections will be used: - 1. 'unzip_desired_filenames' - must be included - 2. 'rename_conflict_files' - optional + It contains info of what files to unzip etc + `None` if not provided by the user. """ # Get arguments: args = babs_unzip_cli().parse_args() - project_root = args.project_root + babs_project_root = args.babs_project_root + where_unzip_project = args.where_unzip_project + unzip_project_name = args.unzip_project_name container_config_yaml_file = args.container_config_yaml_file - # container config: - config = read_yaml(container_config_yaml_file) - # ^^ not to use filelock here - otherwise will create `*.lock` file in user's folder - + # ================================================================= # Sanity checks: - if "unzip_desired_filenames" not in config: - raise Exception("Section 'unzip_desired_filenames' is not included" - " in `--container_config_yaml_file`. This section is required." - " Path to this YAML file: '" + container_config_yaml_file + "'.") + # ================================================================= + unzip_project_root = op.join(where_unzip_project, unzip_project_name) + + # check if it exists: if so, raise error + if op.exists(unzip_project_root): + raise Exception("The folder `--unzip_project_name` '" + unzip_project_name + + "' already exists in the directory" + + " `--where_unzip_project` '" + where_unzip_project + "'!" + + " `babs-init` won't proceed to overwrite this folder.") + + # check if `where_project` exists: + if not op.exists(where_unzip_project): + raise Exception("Path provided in `--where_unzip_project` does not exist!") + + # check if `where_project` is writable: + if not os.access(where_unzip_project, os.W_OK): + raise Exception("Path provided in `--where_unzip_project` is not writable!") # Get class `BABS` based on saved `analysis/code/babs_proj_config.yaml`: - babs_proj, _ = get_existing_babs_proj(project_root) + babs_proj, _ = get_existing_babs_proj(babs_project_root) # Call method `babs_unzip()`: - babs_proj.babs_unzip(config) + babs_proj.babs_unzip(where_unzip_project, unzip_project_name, + container_config_yaml_file) def get_existing_babs_proj(project_root): diff --git a/notebooks/testing_babs_unzip.py b/notebooks/testing_babs_unzip.py new file mode 100644 index 00000000..b89f0c8c --- /dev/null +++ b/notebooks/testing_babs_unzip.py @@ -0,0 +1,9 @@ +import os +import os.path as op +import sys + +sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "babs")) +sys.path.append("..") +from babs.cli import babs_unzip_main # noqa + +babs_unzip_main() From 2dad4759c664a6b868736c305cde7fd16962b7a6 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Wed, 12 Apr 2023 17:53:48 -0400 Subject: [PATCH 2/7] remove datalad get -r containers from participant_job.sh --- babs/babs.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/babs/babs.py b/babs/babs.py index abda84af..7099dc55 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -2676,13 +2676,6 @@ def generate_bash_participant_job(self, bash_path, input_ds, type_session, (cd inputs/data/ && rm -f `ls sub-*.zip | grep -v ${subid}`) """ - # `datalad get` the container ?? - # NOTE: only found in `bootstrap-fmriprep-ingressed-fs.sh`... - # not sure if this is really needed - bash_file.write("\n# Get the container dataset:\n") - bash_file.write("datalad get -r containers" + "\n") - # NOTE: ^^ not sure if `-r` is needed.... - # determine the zip filename: cmd_determine_zipfilename = generate_cmd_determine_zipfilename(input_ds, type_session) bash_file.write(cmd_determine_zipfilename) From 39d96d444630055201adb477fcc6fdd809f24d64 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 13 Apr 2023 15:30:39 -0400 Subject: [PATCH 3/7] add babs-unzip CLI docs --- babs/cli.py | 3 ++- docs/source/babs-unzip.rst | 13 +++++++++++++ docs/source/cli.rst | 10 +++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 docs/source/babs-unzip.rst diff --git a/babs/cli.py b/babs/cli.py index dc949600..6e6c0686 100644 --- a/babs/cli.py +++ b/babs/cli.py @@ -705,7 +705,8 @@ def babs_unzip_cli(): """ CLI for babs-unzip """ parser = argparse.ArgumentParser( - description="``babs-unzip`` unzips results zip files and extracts desired files", + description="``babs-unzip`` initializes an unzip project" + " to unzip results zip files and extract desired files.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--babs_project_root", "--babs-project-root", diff --git a/docs/source/babs-unzip.rst b/docs/source/babs-unzip.rst new file mode 100644 index 00000000..1ab8c64e --- /dev/null +++ b/docs/source/babs-unzip.rst @@ -0,0 +1,13 @@ +################################################## +``babs-unzip``: Initialize an unzip project +################################################## + +.. contents:: Table of Contents + +********************** +Command-Line Arguments +********************** + +.. argparse:: + :ref: babs.cli.babs_unzip_cli + :prog: babs-unzip diff --git a/docs/source/cli.rst b/docs/source/cli.rst index b81e25c7..d5bfcbcd 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -27,10 +27,18 @@ Submit jobs and check job status babs-submit babs-status -After jobs are finished +Merge results =========================== .. toctree:: :maxdepth: 1 babs-merge + +Unzip results +=========================== + +.. toctree:: + :maxdepth: 1 + + babs-unzip From 4e3c414ca00afa9311dbf524fe1d1775ce98b049 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 13 Apr 2023 17:58:09 -0400 Subject: [PATCH 4/7] change babs_bootstrap for unzipping --- babs/babs.py | 100 +++++++++++++++++++++++++++++++++----------------- babs/utils.py | 69 ++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 33 deletions(-) diff --git a/babs/babs.py b/babs/babs.py index 7099dc55..f030933a 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -53,7 +53,8 @@ check_job_account, print_versions_from_yaml, get_git_show_ref_shasum, - ceildiv) + ceildiv, + generate_bash_get_files) # import pandas as pd @@ -250,17 +251,24 @@ def babs_bootstrap(self, input_ds, container_ds, container_name, container_config_yaml_file, system, if_unzip=False): """ - Bootstrap a babs project: initialize datalad-tracked RIAs, generate scripts to be used, etc + Bootstrap a BABS project: + initialize datalad-tracked RIAs, generate scripts to be used, etc. + This can also be used to bootstrap an unzip project to unzip files. + if that's the case, `if_unzip = True`, and: + 1. input dataset will be output RIA (with merged results) of a BABS project; + 2. there is no container datalad dataset Parameters: ------------- input_ds: class `Input_ds` Input dataset(s). - container_name: str + container_name: str or None name of the container, best to include version number. e.g., 'fmriprep-0-0-0' - container_ds: str + `None` only when `if_unzip=True` + container_ds: str or None path to the container datalad dataset which the user provides + `None` only when `if_unzip=True` container_config_yaml_file: str Path to a YAML file that contains the configurations of how to run the BIDS App container @@ -331,8 +339,14 @@ def babs_bootstrap(self, input_ds, babs_proj_config_file.write(" is_zipped: 'TO_BE_FILLED'\n") # container ds: babs_proj_config_file.write("container:\n") - babs_proj_config_file.write(" name: '" + container_name + "'\n") - babs_proj_config_file.write(" path_in: '" + container_ds + "'\n") + if not if_unzip: + babs_proj_config_file.write(" name: '" + container_name + "'\n") + babs_proj_config_file.write(" path_in: '" + container_ds + "'\n") + else: # `if_unzip=True`: there is no container ds as input: + # save as `null` to YAML file, which will be read as `None` + babs_proj_config_file.write(" name: null\n") + babs_proj_config_file.write(" path_in: null\n") + # tested: after re-loading and re-saving, what's in YAML will still be `null` babs_proj_config_file.close() self.datalad_save(path="code/babs_proj_config.yaml", @@ -394,7 +408,10 @@ def babs_bootstrap(self, input_ds, print("\nChecking whether each input dataset is a zipped or unzipped dataset...") input_ds.check_if_zipped() # sanity checks: - input_ds.check_validity_zipped_input_dataset(self.type_session) + if not if_unzip: + input_ds.check_validity_zipped_input_dataset(self.type_session) + # if if_unzip: not to perform the sanity check, as the input ds's + # name is fixed: "results", and won't match with zip filenames or foldernames inside) # Check validity of unzipped ds: # if multi-ses, has `ses-*` in each `sub-*`; if single-ses, has a `sub-*` @@ -422,23 +439,26 @@ def babs_bootstrap(self, input_ds, # path = op.join(self.project_root, "containers")) # path to clone into # directly add container as sub-dataset of `analysis`: - print("\nAdding the container as a sub-dataset of `analysis` dataset...") - dlapi.install(dataset=self.analysis_path, - source=container_ds, # container datalad dataset - path=op.join(self.analysis_path, "containers")) - # into `analysis/containers` folder + if not if_unzip: # only when bootstrap for a regular BABS project: + print("\nAdding the container as a sub-dataset of `analysis` dataset...") + dlapi.install(dataset=self.analysis_path, + source=container_ds, # container datalad dataset + path=op.join(self.analysis_path, "containers")) + # into `analysis/containers` folder - # original bash command, if directly going into as sub-dataset: - # datalad install -d . --source ../../toybidsapp-container-docker/ containers + # original bash command, if directly going into as sub-dataset: + # datalad install -d . --source ../../toybidsapp-container-docker/ containers - # from our the way: - # cd ${PROJECTROOT}/analysis - # datalad install -d . --source ${PROJECTROOT}/pennlinc-containers + # from our the way: + # cd ${PROJECTROOT}/analysis + # datalad install -d . --source ${PROJECTROOT}/pennlinc-containers - container = Container(container_ds, container_name, container_config_yaml_file) + container = Container(container_ds, container_name, container_config_yaml_file) - # sanity check of container ds: - container.sanity_check(self.analysis_path) + # sanity check of container ds: + container.sanity_check(self.analysis_path) + else: # for `babs-unzip`: + container = None # ============================================================== # Bootstrap scripts: @@ -447,15 +467,21 @@ def babs_bootstrap(self, input_ds, # Generate `_zip.sh`: ---------------------------------- # which is a bash script of singularity run + zip # in folder: `analysis/code` - print("\nGenerating a bash script for running container and zipping the outputs...") - print("This bash script will be named as `" + container_name + "_zip.sh`") - bash_path = op.join(self.analysis_path, "code", container_name + "_zip.sh") - container.generate_bash_run_bidsapp(bash_path, input_ds, self.type_session) - self.datalad_save(path="code/" + container_name + "_zip.sh", - message="Generate script of running container") - - # make another folder within `code` for test jobs: - os.makedirs(op.join(self.analysis_path, "code/check_setup"), exist_ok=True) + os.makedirs(op.join(self.analysis_path, "code"), exist_ok=True) + if not if_unzip: + print("\nGenerating a bash script for running container and zipping the outputs...") + print("This bash script will be named as `" + container_name + "_zip.sh`") + bash_path = op.join(self.analysis_path, "code", container_name + "_zip.sh") + container.generate_bash_run_bidsapp(bash_path, input_ds, self.type_session) + self.datalad_save(path="code/" + container_name + "_zip.sh", + message="Generate script of running container") + else: # if this is for `babs-unzip`: + # generate `get_files.sh`: + # as class `Container` is not generated, will call a plain function: + bash_path = op.join(self.analysis_path, "code", "get_files.sh") + generate_bash_get_files(bash_path, container_config_yaml_file) + # TODO: add datalad save: + print("TODO") # Generate `participant_job.sh`: -------------------------------------- print("\nGenerating a bash script for running jobs at participant (or session) level...") @@ -463,15 +489,19 @@ def babs_bootstrap(self, input_ds, bash_path = op.join(self.analysis_path, "code", "participant_job.sh") container.generate_bash_participant_job(bash_path, input_ds, self.type_session, system) + self.datalad_save(path="code/participant_job.sh", + message="Participant compute job implementation") - # also, generate a bash script of a test job used by `babs-check-setup`: + # Generate bash scripts for a test job used by `babs-check-setup`: ------------- + # make another folder within `code` for test jobs: + os.makedirs(op.join(self.analysis_path, "code/check_setup"), exist_ok=True) path_check_setup = op.join(self.analysis_path, "code/check_setup") + # generate `call_test_job.sh` and `test_job.py`: container.generate_bash_test_job(path_check_setup, system) - self.datalad_save(path=["code/participant_job.sh", - "code/check_setup/call_test_job.sh", + self.datalad_save(path=["code/check_setup/call_test_job.sh", "code/check_setup/test_job.py"], - message="Participant compute job implementation") + message="Generate scripts for test jobs in babs-check-setup") # NOTE: `dlapi.save()` does not work... # e.g., datalad save -m "Participant compute job implementation" @@ -1895,6 +1925,10 @@ def babs_unzip(self, where_unzip_project, unzip_project_name, input_ds_cli = [["results", input_ds_path]] # initialize `Input_ds` class: input_ds_unzip = Input_ds(input_ds_cli) + # get `initial_inclu_df` - expect to be `None`: + input_ds_unzip.get_initial_inclu_df( + list_sub_file=None, + type_session=self.type_session) # Initialize unzip project: unzip_project_root = op.join(where_unzip_project, unzip_project_name) diff --git a/babs/utils.py b/babs/utils.py index 7cc2bb15..8a2a9123 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -2083,3 +2083,72 @@ def ceildiv(a, b): ...is-there-a-ceiling-equivalent-of-operator-in-python """ return -(a // -b) + + +def generate_bash_get_files(bash_path, container_config_yaml_file, type_session): + """ + This is to generate a bash script `get_files` used by unzipping. + + Parameters: + ------------ + bash_file: str + The path to the bash file to be generated. + It should be in the `analysis/code` folder. + and probably named as `get_files.sh` + container_config_yaml_file: str + path to container's configuration yaml file. + This should probably include section: + - 'unzip_desired_filenames' + - 'rename_conflict_files' + type_session: str + multi-ses or single-ses. + + Notes: + ------- + This function should be similar to `Container.generate_bash_run_bidsapp()` + """ + # ===================================================================== + # Sanity checks and preparations: + # ===================================================================== + type_session = validate_type_session(type_session) + + # create the folder if not existing yet: + os.makedirs(op.dirname(bash_path), exist_ok=True) + + # Check if the bash file already exist: + if op.exists(bash_path): + os.remove(bash_path) # remove it + + # Check yaml file sections: TODO + + # ===================================================================== + # Generate `get_files.sh`: + # ===================================================================== + # Write into the bash file: + bash_file = open(bash_path, "a") # open in append mode + + # Generate the header and input arguments: ------------------------- + + + # Generate unzipping part: --------------------------------------- + # for each foldernames requested: + # 1. identify the zip filename + # 2. for each desired file pattern, unzip + # 3. rename files `mv` if requested + # 4. remove unzipped folder + + + # Done generating `get_files.sh`: -------------------------------- + bash_file.write("\n") + bash_file.close() + + # ===================================================================== + # Finish up: + # ===================================================================== + # Execute necessary commands: ------------------------------------- + # change the permission of this bash file: + proc_chmod_bashfile = subprocess.run( + ["chmod", "+x", bash_path], # e.g., chmod +x code/get_files.sh + stdout=subprocess.PIPE + ) + proc_chmod_bashfile.check_returncode() From bcd91968690b7fab3839775c9d49a83aa757e875 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Thu, 13 Apr 2023 22:24:10 -0400 Subject: [PATCH 5/7] continue coding for generating get_files.sh --- babs/babs.py | 13 +++++++++---- babs/utils.py | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/babs/babs.py b/babs/babs.py index f030933a..95d5e6ac 100644 --- a/babs/babs.py +++ b/babs/babs.py @@ -454,11 +454,18 @@ def babs_bootstrap(self, input_ds, # datalad install -d . --source ${PROJECTROOT}/pennlinc-containers container = Container(container_ds, container_name, container_config_yaml_file) - # sanity check of container ds: container.sanity_check(self.analysis_path) + # load config as dict - this is not necessary here, + # but just to be consistent with `if_unzip`: + container_config_yaml = container.config else: # for `babs-unzip`: container = None + # the config dict loaded from YAML file: + if container_config_yaml_file is None: + container_config_yaml = None + else: + container_config_yaml = read_yaml(container_config_yaml_file) # ============================================================== # Bootstrap scripts: @@ -479,7 +486,7 @@ def babs_bootstrap(self, input_ds, # generate `get_files.sh`: # as class `Container` is not generated, will call a plain function: bash_path = op.join(self.analysis_path, "code", "get_files.sh") - generate_bash_get_files(bash_path, container_config_yaml_file) + generate_bash_get_files(bash_path, container_config_yaml) # TODO: add datalad save: print("TODO") @@ -1950,8 +1957,6 @@ def babs_unzip(self, where_unzip_project, unzip_project_name, if_unzip=True) print("TODO") - # TODO: `container_config_yaml_file` could be `None`! - # Check before loading in `babs_bootstrap()`!!! # ==================================================== # Run scripts to unzip data diff --git a/babs/utils.py b/babs/utils.py index 8a2a9123..674f7866 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -2085,21 +2085,42 @@ def ceildiv(a, b): return -(a // -b) -def generate_bash_get_files(bash_path, container_config_yaml_file, type_session): +def unzip_folder_patterns(config): + """ + This is to determine the folder patterns for unzipping. + If 'unzip_desired_filenames' is provided in the config YAML file, then refer to that; + if not, meaning getting all files, then get 'zip_foldernames' from 'babs_proj_config.yaml' + + Parameters: + ------------- + config: dict or None + config read from container's config yaml file. + If the yaml file is not provided, it's `None`. + + Notes: + ---------- + TODO: probably called by `babs-init`, and pass to functions for generating + `participant_job.sh` and `get_files.sh` for unzipping? + TODO: add 'zip_foldernames' section into 'babs_proj_config.yaml' when babs-init + """ + print("TODO") + +def generate_bash_get_files(bash_path, config, type_session): """ This is to generate a bash script `get_files` used by unzipping. - + Parameters: ------------ bash_file: str The path to the bash file to be generated. It should be in the `analysis/code` folder. and probably named as `get_files.sh` - container_config_yaml_file: str - path to container's configuration yaml file. + config: dict or None + config loaded from container's configuration yaml file. This should probably include section: - 'unzip_desired_filenames' - 'rename_conflict_files' + If YAML file was not provided, it's `None`. type_session: str multi-ses or single-ses. @@ -2128,7 +2149,19 @@ def generate_bash_get_files(bash_path, container_config_yaml_file, type_session) bash_file = open(bash_path, "a") # open in append mode # Generate the header and input arguments: ------------------------- + bash_file.write("#!/bin/bash\n") + bash_file.write("set -e -u -x\n") + + count_inputs_bash = 0 + bash_file.write('\nsubid="$1"\n') + count_inputs_bash += 1 + + if type_session == "multi-ses": + # also have the input of `sesid`: + bash_file.write('sesid="$2"\n') + count_inputs_bash += 1 + # add zip file name: # Generate unzipping part: --------------------------------------- # for each foldernames requested: From c4d00dcb063d64e22d6dcb29c349fb555fe82eb6 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Fri, 14 Apr 2023 13:43:58 -0400 Subject: [PATCH 6/7] add a file for testing git push --- .vscode/random_file.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 .vscode/random_file.txt diff --git a/.vscode/random_file.txt b/.vscode/random_file.txt new file mode 100644 index 00000000..e02ac740 --- /dev/null +++ b/.vscode/random_file.txt @@ -0,0 +1 @@ +This is just a random file for testing git push. \ No newline at end of file From 0207c9e7cf027dab3761ae3456c0f728028ea233 Mon Sep 17 00:00:00 2001 From: Chenying Zhao Date: Fri, 14 Apr 2023 18:06:59 -0400 Subject: [PATCH 7/7] remove random file used to test git push --- .vscode/random_file.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .vscode/random_file.txt diff --git a/.vscode/random_file.txt b/.vscode/random_file.txt deleted file mode 100644 index e02ac740..00000000 --- a/.vscode/random_file.txt +++ /dev/null @@ -1 +0,0 @@ -This is just a random file for testing git push. \ No newline at end of file