From 13ab11c7dd9a9f8d884ac3112a4484911edf3b3a Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 12:39:16 +0200 Subject: [PATCH 1/7] fix: pattern matching now down on only the relative part of paths Resolves: #858. --- bids/layout/index.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index 105161b62..6bc0a8269 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -26,11 +26,15 @@ def _extract_entities(bidsfile, entities): return match_vals -def _check_path_matches_patterns(path, patterns): +def _check_path_matches_patterns(path, patterns, root=None): """Check if the path matches at least one of the provided patterns. """ if not patterns: return False + path = path.absolute() + if root is not None: + path = Path("/") / path.relative_to(root) + for patt in patterns: if isinstance(patt, Path): if path == patt: @@ -115,17 +119,17 @@ def session(self): return self._layout.connection_manager.session def _validate_dir(self, d, default=None): - if _check_path_matches_patterns(d, self._include_patterns): + if _check_path_matches_patterns(d, self._include_patterns, root=self._layout._root): return True - if _check_path_matches_patterns(d, self._exclude_patterns): + if _check_path_matches_patterns(d, self._exclude_patterns, root=self._layout._root): return False return default def _validate_file(self, f, default=None): # Inclusion takes priority over exclusion - if _check_path_matches_patterns(f, self._include_patterns): + if _check_path_matches_patterns(f, self._include_patterns, root=self._layout._root): return True - if _check_path_matches_patterns(f, self._exclude_patterns): + if _check_path_matches_patterns(f, self._exclude_patterns, root=self._layout._root): return False # If inclusion/exclusion is inherited from a parent directory, that From 004b83b560fd84b28bc7eac3ac291518aa140257 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 16:17:30 +0200 Subject: [PATCH 2/7] enh: add tests --- bids/layout/tests/test_layout.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 7d1dd526b..3d6139f22 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -13,7 +13,7 @@ from bids.layout import BIDSLayout, Query from bids.layout.models import Config -from bids.layout.index import BIDSLayoutIndexer +from bids.layout.index import BIDSLayoutIndexer, _check_path_matches_patterns from bids.layout.utils import PaddedInt from bids.tests import get_test_data_path from bids.utils import natural_sort @@ -835,3 +835,33 @@ def test_padded_run_roundtrip(layout_ds005): assert ents["run"] == 1 newpath = layout_ds005.build_path(ents, absolute_paths=False) assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz" + +@pytest.mark.parametrize( + "fname", [ + "sub-01/anat/sub-01_T1w.nii.gz", + ".datalad", + "code", + "sub-01/.datalad", + ], +) +def test_indexer_patterns(fname): + root = Path("/home/user/.cache/data/") + path = root / fname + + assert _check_path_matches_patterns( + path, + ["code"], + root=None, + ) is (fname == "code") + + assert _check_path_matches_patterns( + path, + [re.compile(r"/\.")], + root=None, + ) is True + + assert _check_path_matches_patterns( + path, + [re.compile(r"/\.")], + root=None, + ) is (".datalad" in fname) From 78e879e52179b20116c4d49dc4117768a63c22e7 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 16:18:53 +0200 Subject: [PATCH 3/7] fix: error in test --- bids/layout/tests/test_layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 3d6139f22..748ed29a3 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -863,5 +863,5 @@ def test_indexer_patterns(fname): assert _check_path_matches_patterns( path, [re.compile(r"/\.")], - root=None, + root=root, ) is (".datalad" in fname) From 70c99f31ac3144af551e9416ccf8481311dd4540 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 17:29:53 +0200 Subject: [PATCH 4/7] fix: pattern should not be list --- bids/layout/tests/test_layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 748ed29a3..f1a71ac87 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -850,7 +850,7 @@ def test_indexer_patterns(fname): assert _check_path_matches_patterns( path, - ["code"], + "code", root=None, ) is (fname == "code") From 9cab015208b976ad98e7870d3944541dfab9244a Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 17:47:45 +0200 Subject: [PATCH 5/7] fix: incidentally, address a bug happening with string patterns --- bids/layout/index.py | 8 +++++--- bids/layout/tests/test_layout.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index 6bc0a8269..547ce6a6a 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -35,11 +35,13 @@ def _check_path_matches_patterns(path, patterns, root=None): if root is not None: path = Path("/") / path.relative_to(root) + # Path now can be downcast to str + path = str(path) for patt in patterns: - if isinstance(patt, Path): - if path == patt: + if isinstance(patt, (str, Path)): + if str(patt) in path: return True - elif patt.search(str(path)): + elif patt.search(path): return True return False diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index f1a71ac87..748ed29a3 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -850,7 +850,7 @@ def test_indexer_patterns(fname): assert _check_path_matches_patterns( path, - "code", + ["code"], root=None, ) is (fname == "code") From abecaf776e474ea9122ce405596a4b8466d2b11f Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Wed, 18 May 2022 18:01:04 +0200 Subject: [PATCH 6/7] fix: relativeness also affects Path patterns --- bids/layout/index.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index 547ce6a6a..e67613d00 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -38,7 +38,12 @@ def _check_path_matches_patterns(path, patterns, root=None): # Path now can be downcast to str path = str(path) for patt in patterns: - if isinstance(patt, (str, Path)): + if isinstance(patt, Path): + patt = str( + patt if root is None + else patt.relative_to(root) + ) + if isinstance(patt, str): if str(patt) in path: return True elif patt.search(path): From c602bde4137020df741f7c48bc27ea63378487a4 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Thu, 19 May 2022 11:09:48 +0200 Subject: [PATCH 7/7] fix: deep revision of validation --- bids/layout/index.py | 73 +++++++++++++++++++++++--------- bids/layout/tests/test_layout.py | 59 +++++++++++++++++++------- bids/layout/validation.py | 54 +++++++++++++++-------- 3 files changed, 132 insertions(+), 54 deletions(-) diff --git a/bids/layout/index.py b/bids/layout/index.py index e67613d00..a5d48af0f 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -38,19 +38,50 @@ def _check_path_matches_patterns(path, patterns, root=None): # Path now can be downcast to str path = str(path) for patt in patterns: + if hasattr(patt, "search"): + if patt.search(path): + return True + else: + continue + if isinstance(patt, Path): patt = str( patt if root is None - else patt.relative_to(root) + else Path("/") / patt.relative_to(root) ) - if isinstance(patt, str): - if str(patt) in path: - return True - elif patt.search(path): - return True + if str(patt) in path: + return str(patt) + return False +def _validate_path(path, incl_patt=None, excl_patt=None, root=None, default=None): + incl_matched = _check_path_matches_patterns(path, incl_patt, root=root) + excl_matched = _check_path_matches_patterns(path, excl_patt, root=root) + + if incl_matched is False and excl_matched is False: + return default + + # Include: if a inclusion regex pattern matched or a nonregex inclusion pattern matched + if incl_matched is True or (incl_matched and not excl_matched): + return True + + if excl_matched is True: + return False + + if not incl_matched and excl_matched: + return False + + # Both matched: nested pattern + if incl_matched not in excl_matched: + return True + + if excl_matched not in incl_matched: + return False + + return default + + class BIDSLayoutIndexer: """ Indexer class for BIDSLayout. @@ -125,19 +156,16 @@ def __call__(self, layout): def session(self): return self._layout.connection_manager.session - def _validate_dir(self, d, default=None): - if _check_path_matches_patterns(d, self._include_patterns, root=self._layout._root): - return True - if _check_path_matches_patterns(d, self._exclude_patterns, root=self._layout._root): - return False - return default - def _validate_file(self, f, default=None): - # Inclusion takes priority over exclusion - if _check_path_matches_patterns(f, self._include_patterns, root=self._layout._root): - return True - if _check_path_matches_patterns(f, self._exclude_patterns, root=self._layout._root): - return False + matched_patt = _validate_path( + f, + incl_patt=self._include_patterns, + excl_patt=self._exclude_patterns, + default=default, + root=self._layout._root + ) + if matched_patt is not None: + return matched_patt # If inclusion/exclusion is inherited from a parent directory, that # takes precedence over the remaining file-level rules @@ -184,7 +212,13 @@ def _index_dir(self, path, config, default_action=None): _, dirnames, filenames = next(os.walk(path)) # Set the default inclusion/exclusion directive - default = self._validate_dir(path, default=default_action) + default = _validate_path( + path, + incl_patt=self._include_patterns, + excl_patt=self._exclude_patterns, + default=default_action, + root=self._layout._root, + ) # If layout configuration file exists, delete it if self.config_filename in filenames: @@ -203,7 +237,6 @@ def _index_dir(self, path, config, default_action=None): d = path / d self._index_dir(d, list(config), default_action=default) - def _index_file(self, f, dirpath, entities, default_action=None): """Create DB record for file and its tags. """ abs_fn = dirpath / f diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 748ed29a3..955082822 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -43,7 +43,11 @@ def test_layout_init(layout_7t_trt): ]) def test_index_metadata(index_metadata, query, result, mock_config): data_dir = join(get_test_data_path(), '7t_trt') - layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(index_metadata=index_metadata), + **query + ) sample_file = layout.get(task='rest', extension='.nii.gz', acquisition='fullbrain')[0] metadata = sample_file.get_metadata() @@ -425,22 +429,39 @@ def test_nested_include_exclude(): target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - # Nest a directory exclusion within an inclusion - layout = BIDSLayout(data_dir, validate=True, force_index=['models'], - ignore=[os.path.join('models', 'extras')]) - assert layout.get_file(target1) - assert not layout.get_file(target2) - # Nest a directory inclusion within an exclusion - layout = BIDSLayout(data_dir, validate=True, ignore=['models'], - force_index=[os.path.join('models', 'extras')]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=[os.path.join('models', 'extras')], + ignore=['models'], + ), + ) assert not layout.get_file(target1) assert layout.get_file(target2) + # Nest a directory exclusion within an inclusion + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=['models'], + ignore=[os.path.join('models', 'extras')], + ), + ) + assert layout.get_file(target1) + assert not layout.get_file(target2) + # Force file inclusion despite directory-level exclusion - models = ['models', target2] - layout = BIDSLayout(data_dir, validate=True, force_index=models, - ignore=[os.path.join('models', 'extras')]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=['models', target2], + ignore=[os.path.join('models', 'extras')], + ), + ) assert layout.get_file(target1) assert layout.get_file(target2) @@ -453,11 +474,17 @@ def test_nested_include_exclude_with_regex(): target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(ignore=[patt2], force_index=[patt1]) + ) assert layout.get_file(target1) assert not layout.get_file(target2) - layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(ignore=[patt1], force_index=[patt2]) + ) assert not layout.get_file(target1) assert layout.get_file(target2) @@ -848,11 +875,11 @@ def test_indexer_patterns(fname): root = Path("/home/user/.cache/data/") path = root / fname - assert _check_path_matches_patterns( + assert bool(_check_path_matches_patterns( path, ["code"], root=None, - ) is (fname == "code") + )) is (fname == "code") assert _check_path_matches_patterns( path, diff --git a/bids/layout/validation.py b/bids/layout/validation.py index 67cfe6003..9f8b67b72 100644 --- a/bids/layout/validation.py +++ b/bids/layout/validation.py @@ -30,8 +30,13 @@ k: val[k] for val in MANDATORY_DERIVATIVES_FIELDS.values() for k in val} -DEFAULT_LOCATIONS_TO_IGNORE = ("code", "stimuli", "sourcedata", "models", - re.compile(r'^\.')) +DEFAULT_LOCATIONS_TO_IGNORE = { + "code", + "stimuli", + "sourcedata", + "models", + re.compile(r'^\.'), +} def absolute_path_deprecation_warning(): warnings.warn("The absolute_paths argument will be removed from PyBIDS " @@ -172,27 +177,40 @@ def check_for_description(bids_dir): return paths +def _sort_patterns(patterns, root): + """Return sorted patterns, from more specific to more general.""" + regexes = [patt for patt in patterns if hasattr(patt, "search")] + + paths = [ + str((root / patt).absolute()) + for patt in listify(patterns) + if not hasattr(patt, "search") + ] + # Sort patterns from general to specific + paths.sort(key=len) + + # Combine and return (note path patterns are reversed, specific first) + return [Path(p) for p in reversed(paths)] + regexes + + def validate_indexing_args(ignore, force_index, root): if ignore is None: - ignore = DEFAULT_LOCATIONS_TO_IGNORE + ignore = list( + DEFAULT_LOCATIONS_TO_IGNORE - set(force_index or []) + ) # root has already been validated to be a directory - ignore = [(root / patt).absolute() - if isinstance(patt, str) else patt - for patt in listify(ignore or [])] - force_index = [(root / patt).absolute() - if isinstance(patt, str) else patt - for patt in listify(force_index or [])] + ignore = _sort_patterns(ignore, root) + force_index = _sort_patterns(force_index or [], root) # Derivatives get special handling; they shouldn't be indexed normally - if force_index is not None: - for entry in force_index: - condi = (isinstance(entry, str) and - str(entry.resolve()).startswith('derivatives')) - if condi: - msg = ("Do not pass 'derivatives' in the force_index " - "list. To index derivatives, either set " - "derivatives=True, or use add_derivatives().") - raise ValueError(msg) + for entry in force_index: + condi = (isinstance(entry, str) and + str(entry.resolve()).startswith('derivatives')) + if condi: + msg = ("Do not pass 'derivatives' in the force_index " + "list. To index derivatives, either set " + "derivatives=True, or use add_derivatives().") + raise ValueError(msg) return ignore, force_index