Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] implement babs-unzip #75

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 31 additions & 21 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,27 @@
// "--project-root",
// "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/test_babs_multi-ses_toybidsapp",
// ],
"args": [
"--where_project",
"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data",
"--project_name",
"test_babs_multi-ses_toybidsapp",
"--input",
"BIDS",
"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/w2nu3",
// "https://osf.io/w2nu3/",
"--container_ds",
"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/toybidsapp-container-docker",
"--container_name",
"toybidsapp-0-0-6",
"--container_config_yaml_file",
"/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/babs/notebooks/example_container_toybidsapp.yaml",
"--type_session",
"multi-ses",
"--type_system",
"sge",
"--keep-if-failed"
]
// "args": [
// "--where_project",
// "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data",
// "--project_name",
// "test_babs_multi-ses_toybidsapp",
// "--input",
// "BIDS",
// "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/w2nu3",
// // "https://osf.io/w2nu3/",
// "--container_ds",
// "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/data/toybidsapp-container-docker",
// "--container_name",
// "toybidsapp-0-0-6",
// "--container_config_yaml_file",
// "/Users/chenyzh/Desktop/Research/Satterthwaite_Lab/datalad_wrapper/babs/notebooks/example_container_toybidsapp.yaml",
// "--type_session",
// "multi-ses",
// "--type_system",
// "sge",
// "--keep-if-failed"
// ]
// "args": [
// "--where_project", "/cbica/projects/BABS/data",
// "--project_name", "test_babs_multi-ses_toybidsapp",
Expand All @@ -61,6 +61,16 @@
// "--path-workspace", "/cbica/projects/BABS/data/test_babs_multi-ses_toybidsapp/analysis/logs",
// "--path-check-setup", "/cbica/projects/BABS/data/test_babs_multi-ses_toybidsapp/analysis/code/check_setup"
// ]
"args": [
"--babs-project-root",
"/cbica/projects/BABS/data/test_babs_multi-ses_fmriprepfake",
"--where-unzip-project",
"/cbica/projects/BABS/data",
"--unzip-project-name",
"multi-ses_fmriprepfake_unzip",
"--container-config-yaml-file",
"/cbica/projects/BABS/babs/notebooks/example_container_fmriprepfake.yaml",
]
}
]
}
172 changes: 117 additions & 55 deletions babs/babs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
check_job_account,
print_versions_from_yaml,
get_git_show_ref_shasum,
ceildiv)
ceildiv,
generate_bash_get_files)

# import pandas as pd

Expand Down Expand Up @@ -248,24 +249,33 @@ def wtf_key_info(self, flag_output_ria_only=False):

def babs_bootstrap(self, input_ds,
container_ds, container_name, container_config_yaml_file,
system):
system, if_unzip=False):
"""
Bootstrap a babs project: initialize datalad-tracked RIAs, generate scripts to be used, etc
Bootstrap a BABS project:
initialize datalad-tracked RIAs, generate scripts to be used, etc.
This can also be used to bootstrap an unzip project to unzip files.
if that's the case, `if_unzip = True`, and:
1. input dataset will be output RIA (with merged results) of a BABS project;
2. there is no container datalad dataset

Parameters:
-------------
input_ds: class `Input_ds`
Input dataset(s).
container_name: str
container_name: str or None
name of the container, best to include version number.
e.g., 'fmriprep-0-0-0'
container_ds: str
`None` only when `if_unzip=True`
container_ds: str or None
path to the container datalad dataset which the user provides
`None` only when `if_unzip=True`
container_config_yaml_file: str
Path to a YAML file that contains the configurations
of how to run the BIDS App container
system: class `System`
information about the cluster management system
if_unzip: bool
if bootstrap scripts for an unzip project (i.e., not regular BABS project)
"""

# ==============================================================
Expand Down Expand Up @@ -329,8 +339,14 @@ def babs_bootstrap(self, input_ds,
babs_proj_config_file.write(" is_zipped: 'TO_BE_FILLED'\n")
# container ds:
babs_proj_config_file.write("container:\n")
babs_proj_config_file.write(" name: '" + container_name + "'\n")
babs_proj_config_file.write(" path_in: '" + container_ds + "'\n")
if not if_unzip:
babs_proj_config_file.write(" name: '" + container_name + "'\n")
babs_proj_config_file.write(" path_in: '" + container_ds + "'\n")
else: # `if_unzip=True`: there is no container ds as input:
# save as `null` to YAML file, which will be read as `None`
babs_proj_config_file.write(" name: null\n")
babs_proj_config_file.write(" path_in: null\n")
# tested: after re-loading and re-saving, what's in YAML will still be `null`

babs_proj_config_file.close()
self.datalad_save(path="code/babs_proj_config.yaml",
Expand Down Expand Up @@ -392,7 +408,10 @@ def babs_bootstrap(self, input_ds,
print("\nChecking whether each input dataset is a zipped or unzipped dataset...")
input_ds.check_if_zipped()
# sanity checks:
input_ds.check_validity_zipped_input_dataset(self.type_session)
if not if_unzip:
input_ds.check_validity_zipped_input_dataset(self.type_session)
# if if_unzip: not to perform the sanity check, as the input ds's
# name is fixed: "results", and won't match with zip filenames or foldernames inside)

# Check validity of unzipped ds:
# if multi-ses, has `ses-*` in each `sub-*`; if single-ses, has a `sub-*`
Expand Down Expand Up @@ -420,23 +439,33 @@ def babs_bootstrap(self, input_ds,
# path = op.join(self.project_root, "containers")) # path to clone into

# directly add container as sub-dataset of `analysis`:
print("\nAdding the container as a sub-dataset of `analysis` dataset...")
dlapi.install(dataset=self.analysis_path,
source=container_ds, # container datalad dataset
path=op.join(self.analysis_path, "containers"))
# into `analysis/containers` folder

# original bash command, if directly going into as sub-dataset:
# datalad install -d . --source ../../toybidsapp-container-docker/ containers

# from our the way:
# cd ${PROJECTROOT}/analysis
# datalad install -d . --source ${PROJECTROOT}/pennlinc-containers

container = Container(container_ds, container_name, container_config_yaml_file)

# sanity check of container ds:
container.sanity_check(self.analysis_path)
if not if_unzip: # only when bootstrap for a regular BABS project:
print("\nAdding the container as a sub-dataset of `analysis` dataset...")
dlapi.install(dataset=self.analysis_path,
source=container_ds, # container datalad dataset
path=op.join(self.analysis_path, "containers"))
# into `analysis/containers` folder

# original bash command, if directly going into as sub-dataset:
# datalad install -d . --source ../../toybidsapp-container-docker/ containers

# from our the way:
# cd ${PROJECTROOT}/analysis
# datalad install -d . --source ${PROJECTROOT}/pennlinc-containers

container = Container(container_ds, container_name, container_config_yaml_file)
# sanity check of container ds:
container.sanity_check(self.analysis_path)
# load config as dict - this is not necessary here,
# but just to be consistent with `if_unzip`:
container_config_yaml = container.config
else: # for `babs-unzip`:
container = None
# the config dict loaded from YAML file:
if container_config_yaml_file is None:
container_config_yaml = None
else:
container_config_yaml = read_yaml(container_config_yaml_file)

# ==============================================================
# Bootstrap scripts:
Expand All @@ -445,31 +474,41 @@ def babs_bootstrap(self, input_ds,
# Generate `<containerName>_zip.sh`: ----------------------------------
# which is a bash script of singularity run + zip
# in folder: `analysis/code`
print("\nGenerating a bash script for running container and zipping the outputs...")
print("This bash script will be named as `" + container_name + "_zip.sh`")
bash_path = op.join(self.analysis_path, "code", container_name + "_zip.sh")
container.generate_bash_run_bidsapp(bash_path, input_ds, self.type_session)
self.datalad_save(path="code/" + container_name + "_zip.sh",
message="Generate script of running container")

# make another folder within `code` for test jobs:
os.makedirs(op.join(self.analysis_path, "code/check_setup"), exist_ok=True)
os.makedirs(op.join(self.analysis_path, "code"), exist_ok=True)
if not if_unzip:
print("\nGenerating a bash script for running container and zipping the outputs...")
print("This bash script will be named as `" + container_name + "_zip.sh`")
bash_path = op.join(self.analysis_path, "code", container_name + "_zip.sh")
container.generate_bash_run_bidsapp(bash_path, input_ds, self.type_session)
self.datalad_save(path="code/" + container_name + "_zip.sh",
message="Generate script of running container")
else: # if this is for `babs-unzip`:
# generate `get_files.sh`:
# as class `Container` is not generated, will call a plain function:
bash_path = op.join(self.analysis_path, "code", "get_files.sh")
generate_bash_get_files(bash_path, container_config_yaml)
# TODO: add datalad save:
print("TODO")

# Generate `participant_job.sh`: --------------------------------------
print("\nGenerating a bash script for running jobs at participant (or session) level...")
print("This bash script will be named as `participant_job.sh`")
bash_path = op.join(self.analysis_path, "code", "participant_job.sh")
container.generate_bash_participant_job(bash_path, input_ds, self.type_session,
system)
self.datalad_save(path="code/participant_job.sh",
message="Participant compute job implementation")

# also, generate a bash script of a test job used by `babs-check-setup`:
# Generate bash scripts for a test job used by `babs-check-setup`: -------------
# make another folder within `code` for test jobs:
os.makedirs(op.join(self.analysis_path, "code/check_setup"), exist_ok=True)
path_check_setup = op.join(self.analysis_path, "code/check_setup")
# generate `call_test_job.sh` and `test_job.py`:
container.generate_bash_test_job(path_check_setup, system)

self.datalad_save(path=["code/participant_job.sh",
"code/check_setup/call_test_job.sh",
self.datalad_save(path=["code/check_setup/call_test_job.sh",
"code/check_setup/test_job.py"],
message="Participant compute job implementation")
message="Generate scripts for test jobs in babs-check-setup")
# NOTE: `dlapi.save()` does not work...
# e.g., datalad save -m "Participant compute job implementation"

Expand Down Expand Up @@ -1865,27 +1904,57 @@ def babs_merge(self, chunk_size, trial_run):
+ " not to push merging actions to output RIA.")
print("\n`babs-merge` did not fully finish yet!")

def babs_unzip(container_config_yaml_file):
def babs_unzip(self, where_unzip_project, unzip_project_name,
container_config_yaml_file):
"""
This function unzips results and extract desired files.
This is done in 3 steps:
1. Generate scripts used by `babs-unzip`
2. Run scripts to unzip data
3. Merge all branches of unzipping
This function initialize an unzip project for a BABS project.

Parameters:
--------------
config: dict
loaded container config yaml file
where_unzip_project: str
Absolute path to the directory where the unzip project will locate.
unzip_project_name: str
The name of the unzip project.
container_config_yaml_file: str or None
path to container's configuration YAML file.
It contains info of what files to unzip etc
`None` if not provided by the user.
"""

# ====================================================
# Generate scripts used by `babs-unzip`
# ====================================================

# Prepare input_ds_unzip:
# Prepare `input_ds_unzip` (class `Input_ds`) for unzipping:
# path to the output ria to be cloned:
# e.g., "ria+file:///path/to/my_BABS_project/output_ria#~data""
input_ds_path = self.output_ria_url + "#~data"
input_ds_cli = [["results", input_ds_path]]
# initialize `Input_ds` class:
input_ds_unzip = Input_ds(input_ds_cli)
# get `initial_inclu_df` - expect to be `None`:
input_ds_unzip.get_initial_inclu_df(
list_sub_file=None,
type_session=self.type_session)

# Initialize unzip project:
unzip_project_root = op.join(where_unzip_project, unzip_project_name)
# initialize:
unzip_babs_project = BABS(unzip_project_root,
self.type_session, # same as BABS project
self.type_system) # same as BABS project
# Other necessary inputs for `babs_bootstrap()`:
system = System(self.type_system)

# Call `babs_bootstrap()`:
# !!!! using babs_proj_unzip, instead current `self`!!!
# !!!! using `unzip_project_root`, instead of current `self`!!!
unzip_babs_project.babs_bootstrap(
input_ds=input_ds_unzip,
container_ds=None, # no container ds for bootstrap unzip project
container_name=None,
container_config_yaml_file=container_config_yaml_file,
system=system,
if_unzip=True)

print("TODO")

Expand Down Expand Up @@ -2646,13 +2715,6 @@ def generate_bash_participant_job(self, bash_path, input_ds, type_session,
(cd inputs/data/<name> && rm -f `ls sub-*.zip | grep -v ${subid}`)
"""

# `datalad get` the container ??
# NOTE: only found in `bootstrap-fmriprep-ingressed-fs.sh`...
# not sure if this is really needed
bash_file.write("\n# Get the container dataset:\n")
bash_file.write("datalad get -r containers" + "\n")
# NOTE: ^^ not sure if `-r` is needed....

# determine the zip filename:
cmd_determine_zipfilename = generate_cmd_determine_zipfilename(input_ds, type_session)
bash_file.write(cmd_determine_zipfilename)
Expand Down
Loading