From ac5a1b09d2ec25ac9376b78f2a9bc9be69924e40 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 10 Oct 2023 14:05:54 -0400 Subject: [PATCH] Concatenate across directions as well as runs (#965) --- xcp_d/tests/test_utils_bids.py | 49 ++++++++++++++++++++++++++++++++ xcp_d/utils/bids.py | 27 ++++++++++++++---- xcp_d/workflows/base.py | 2 +- xcp_d/workflows/concatenation.py | 4 +-- 4 files changed, 73 insertions(+), 9 deletions(-) diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py index ff99b55ca..7eddb24fc 100644 --- a/xcp_d/tests/test_utils_bids.py +++ b/xcp_d/tests/test_utils_bids.py @@ -245,3 +245,52 @@ def test_get_entity(datasets): ) with pytest.raises(ValueError, match="Unknown space"): xbids.get_entity(fname, "space") + + +def test_group_across_runs(): + """Test group_across_runs.""" + in_files = [ + "/path/sub-01_task-axcpt_run-03_bold.nii.gz", + "/path/sub-01_task-rest_run-03_bold.nii.gz", + "/path/sub-01_task-rest_run-01_bold.nii.gz", + "/path/sub-01_task-axcpt_run-02_bold.nii.gz", + "/path/sub-01_task-rest_run-02_bold.nii.gz", + "/path/sub-01_task-axcpt_run-01_bold.nii.gz", + ] + grouped_files = xbids.group_across_runs(in_files) + assert isinstance(grouped_files, list) + assert len(grouped_files[0]) == 3 + assert grouped_files[0] == [ + "/path/sub-01_task-axcpt_run-01_bold.nii.gz", + "/path/sub-01_task-axcpt_run-02_bold.nii.gz", + "/path/sub-01_task-axcpt_run-03_bold.nii.gz", + ] + assert len(grouped_files[1]) == 3 + assert grouped_files[1] == [ + "/path/sub-01_task-rest_run-01_bold.nii.gz", + "/path/sub-01_task-rest_run-02_bold.nii.gz", + "/path/sub-01_task-rest_run-03_bold.nii.gz", + ] + + in_files = [ + "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz", + ] + grouped_files = xbids.group_across_runs(in_files) + assert isinstance(grouped_files, list) + assert len(grouped_files[0]) == 2 + assert grouped_files[0] == [ + "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz", + ] + assert len(grouped_files[1]) == 4 + assert grouped_files[1] == [ + "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz", + "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz", + ] diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py index 1321ab536..efa7c9aaf 100644 --- a/xcp_d/utils/bids.py +++ b/xcp_d/utils/bids.py @@ -896,7 +896,11 @@ def get_entity(filename, entity): def group_across_runs(in_files): - """Group preprocessed BOLD files by unique sets of entities, ignoring run. + """Group preprocessed BOLD files by unique sets of entities, ignoring run and direction. + + We only ignore direction for the sake of HCP. + This may lead to small problems for non-HCP datasets that differentiate scans based on + both run and direction. Parameters ---------- @@ -913,20 +917,31 @@ def group_across_runs(in_files): # First, extract run information and sort the input files by the runs, # so that any cases where files are not already in ascending run order get fixed. - run_numbers = [] + run_numbers, directions = [], [] for in_file in in_files: run = get_entity(in_file, "run") if run is None: run = 0 + direction = get_entity(in_file, "dir") + if direction is None: + direction = "none" + run_numbers.append(int(run)) + directions.append(direction) + + # Combine the three lists into a list of tuples + combined_data = list(zip(run_numbers, directions, in_files)) + + # Sort the list of tuples first by run and then by direction + sorted_data = sorted(combined_data, key=lambda x: (x[0], x[1], x[2])) - # Sort the files by the run numbers. - zipped_pairs = zip(run_numbers, in_files) - sorted_in_files = [x for _, x in sorted(zipped_pairs)] + # Sort the file list + sorted_in_files = [item[2] for item in sorted_data] - # Extract the unique sets of entities (i.e., the filename, minus the run entity). + # Extract the unique sets of entities (i.e., the filename, minus the run and dir entities). unique_filenames = [re.sub("_run-[0-9]+_", "_", os.path.basename(f)) for f in sorted_in_files] + unique_filenames = [re.sub("_dir-[0-9a-zA-Z]+_", "_", f) for f in unique_filenames] # Assign each in_file to a group of files with the same entities, except run. out_files, grouped_unique_filenames = [], [] diff --git a/xcp_d/workflows/base.py b/xcp_d/workflows/base.py index 7620c3c94..f2f8a6c7d 100644 --- a/xcp_d/workflows/base.py +++ b/xcp_d/workflows/base.py @@ -635,7 +635,7 @@ def init_subject_wf( ) n_runs = len(preproc_files) - preproc_files = group_across_runs(preproc_files) + preproc_files = group_across_runs(preproc_files) # group files across runs and directions run_counter = 0 for ent_set, task_files in enumerate(preproc_files): # Assuming TR is constant across runs for a given combination of entities. diff --git a/xcp_d/workflows/concatenation.py b/xcp_d/workflows/concatenation.py index 29b4755c2..1e4a95b2a 100644 --- a/xcp_d/workflows/concatenation.py +++ b/xcp_d/workflows/concatenation.py @@ -28,7 +28,7 @@ def init_concatenate_data_wf( dcan_qc, name="concatenate_data_wf", ): - """Concatenate postprocessed data. + """Concatenate postprocessed data across runs and directions. Workflow Graph .. workflow:: @@ -99,7 +99,7 @@ def init_concatenate_data_wf( workflow = Workflow(name=name) workflow.__desc__ = """ -Postprocessing derivatives from multi-run tasks were then concatenated across runs. +Postprocessing derivatives from multi-run tasks were then concatenated across runs and directions. """ inputnode = pe.Node(