diff --git a/bids/layout/index.py b/bids/layout/index.py index 105161b62..a5d48af0f 100644 --- a/bids/layout/index.py +++ b/bids/layout/index.py @@ -26,20 +26,62 @@ def _extract_entities(bidsfile, entities): return match_vals -def _check_path_matches_patterns(path, patterns): +def _check_path_matches_patterns(path, patterns, root=None): """Check if the path matches at least one of the provided patterns. """ if not patterns: return False + path = path.absolute() + if root is not None: + path = Path("/") / path.relative_to(root) + + # Path now can be downcast to str + path = str(path) for patt in patterns: - if isinstance(patt, Path): - if path == patt: + if hasattr(patt, "search"): + if patt.search(path): return True - elif patt.search(str(path)): - return True + else: + continue + + if isinstance(patt, Path): + patt = str( + patt if root is None + else Path("/") / patt.relative_to(root) + ) + if str(patt) in path: + return str(patt) + return False +def _validate_path(path, incl_patt=None, excl_patt=None, root=None, default=None): + incl_matched = _check_path_matches_patterns(path, incl_patt, root=root) + excl_matched = _check_path_matches_patterns(path, excl_patt, root=root) + + if incl_matched is False and excl_matched is False: + return default + + # Include: if a inclusion regex pattern matched or a nonregex inclusion pattern matched + if incl_matched is True or (incl_matched and not excl_matched): + return True + + if excl_matched is True: + return False + + if not incl_matched and excl_matched: + return False + + # Both matched: nested pattern + if incl_matched not in excl_matched: + return True + + if excl_matched not in incl_matched: + return False + + return default + + class BIDSLayoutIndexer: """ Indexer class for BIDSLayout. @@ -114,19 +156,16 @@ def __call__(self, layout): def session(self): return self._layout.connection_manager.session - def _validate_dir(self, d, default=None): - if _check_path_matches_patterns(d, self._include_patterns): - return True - if _check_path_matches_patterns(d, self._exclude_patterns): - return False - return default - def _validate_file(self, f, default=None): - # Inclusion takes priority over exclusion - if _check_path_matches_patterns(f, self._include_patterns): - return True - if _check_path_matches_patterns(f, self._exclude_patterns): - return False + matched_patt = _validate_path( + f, + incl_patt=self._include_patterns, + excl_patt=self._exclude_patterns, + default=default, + root=self._layout._root + ) + if matched_patt is not None: + return matched_patt # If inclusion/exclusion is inherited from a parent directory, that # takes precedence over the remaining file-level rules @@ -173,7 +212,13 @@ def _index_dir(self, path, config, default_action=None): _, dirnames, filenames = next(os.walk(path)) # Set the default inclusion/exclusion directive - default = self._validate_dir(path, default=default_action) + default = _validate_path( + path, + incl_patt=self._include_patterns, + excl_patt=self._exclude_patterns, + default=default_action, + root=self._layout._root, + ) # If layout configuration file exists, delete it if self.config_filename in filenames: @@ -192,7 +237,6 @@ def _index_dir(self, path, config, default_action=None): d = path / d self._index_dir(d, list(config), default_action=default) - def _index_file(self, f, dirpath, entities, default_action=None): """Create DB record for file and its tags. """ abs_fn = dirpath / f diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 7d1dd526b..955082822 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -13,7 +13,7 @@ from bids.layout import BIDSLayout, Query from bids.layout.models import Config -from bids.layout.index import BIDSLayoutIndexer +from bids.layout.index import BIDSLayoutIndexer, _check_path_matches_patterns from bids.layout.utils import PaddedInt from bids.tests import get_test_data_path from bids.utils import natural_sort @@ -43,7 +43,11 @@ def test_layout_init(layout_7t_trt): ]) def test_index_metadata(index_metadata, query, result, mock_config): data_dir = join(get_test_data_path(), '7t_trt') - layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(index_metadata=index_metadata), + **query + ) sample_file = layout.get(task='rest', extension='.nii.gz', acquisition='fullbrain')[0] metadata = sample_file.get_metadata() @@ -425,22 +429,39 @@ def test_nested_include_exclude(): target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - # Nest a directory exclusion within an inclusion - layout = BIDSLayout(data_dir, validate=True, force_index=['models'], - ignore=[os.path.join('models', 'extras')]) - assert layout.get_file(target1) - assert not layout.get_file(target2) - # Nest a directory inclusion within an exclusion - layout = BIDSLayout(data_dir, validate=True, ignore=['models'], - force_index=[os.path.join('models', 'extras')]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=[os.path.join('models', 'extras')], + ignore=['models'], + ), + ) assert not layout.get_file(target1) assert layout.get_file(target2) + # Nest a directory exclusion within an inclusion + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=['models'], + ignore=[os.path.join('models', 'extras')], + ), + ) + assert layout.get_file(target1) + assert not layout.get_file(target2) + # Force file inclusion despite directory-level exclusion - models = ['models', target2] - layout = BIDSLayout(data_dir, validate=True, force_index=models, - ignore=[os.path.join('models', 'extras')]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer( + validate=True, + force_index=['models', target2], + ignore=[os.path.join('models', 'extras')], + ), + ) assert layout.get_file(target1) assert layout.get_file(target2) @@ -453,11 +474,17 @@ def test_nested_include_exclude_with_regex(): target1 = join(data_dir, 'models', 'ds-005_type-test_model.json') target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json') - layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(ignore=[patt2], force_index=[patt1]) + ) assert layout.get_file(target1) assert not layout.get_file(target2) - layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2]) + layout = BIDSLayout( + data_dir, + indexer=BIDSLayoutIndexer(ignore=[patt1], force_index=[patt2]) + ) assert not layout.get_file(target1) assert layout.get_file(target2) @@ -835,3 +862,33 @@ def test_padded_run_roundtrip(layout_ds005): assert ents["run"] == 1 newpath = layout_ds005.build_path(ents, absolute_paths=False) assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz" + +@pytest.mark.parametrize( + "fname", [ + "sub-01/anat/sub-01_T1w.nii.gz", + ".datalad", + "code", + "sub-01/.datalad", + ], +) +def test_indexer_patterns(fname): + root = Path("/home/user/.cache/data/") + path = root / fname + + assert bool(_check_path_matches_patterns( + path, + ["code"], + root=None, + )) is (fname == "code") + + assert _check_path_matches_patterns( + path, + [re.compile(r"/\.")], + root=None, + ) is True + + assert _check_path_matches_patterns( + path, + [re.compile(r"/\.")], + root=root, + ) is (".datalad" in fname) diff --git a/bids/layout/validation.py b/bids/layout/validation.py index 67cfe6003..9f8b67b72 100644 --- a/bids/layout/validation.py +++ b/bids/layout/validation.py @@ -30,8 +30,13 @@ k: val[k] for val in MANDATORY_DERIVATIVES_FIELDS.values() for k in val} -DEFAULT_LOCATIONS_TO_IGNORE = ("code", "stimuli", "sourcedata", "models", - re.compile(r'^\.')) +DEFAULT_LOCATIONS_TO_IGNORE = { + "code", + "stimuli", + "sourcedata", + "models", + re.compile(r'^\.'), +} def absolute_path_deprecation_warning(): warnings.warn("The absolute_paths argument will be removed from PyBIDS " @@ -172,27 +177,40 @@ def check_for_description(bids_dir): return paths +def _sort_patterns(patterns, root): + """Return sorted patterns, from more specific to more general.""" + regexes = [patt for patt in patterns if hasattr(patt, "search")] + + paths = [ + str((root / patt).absolute()) + for patt in listify(patterns) + if not hasattr(patt, "search") + ] + # Sort patterns from general to specific + paths.sort(key=len) + + # Combine and return (note path patterns are reversed, specific first) + return [Path(p) for p in reversed(paths)] + regexes + + def validate_indexing_args(ignore, force_index, root): if ignore is None: - ignore = DEFAULT_LOCATIONS_TO_IGNORE + ignore = list( + DEFAULT_LOCATIONS_TO_IGNORE - set(force_index or []) + ) # root has already been validated to be a directory - ignore = [(root / patt).absolute() - if isinstance(patt, str) else patt - for patt in listify(ignore or [])] - force_index = [(root / patt).absolute() - if isinstance(patt, str) else patt - for patt in listify(force_index or [])] + ignore = _sort_patterns(ignore, root) + force_index = _sort_patterns(force_index or [], root) # Derivatives get special handling; they shouldn't be indexed normally - if force_index is not None: - for entry in force_index: - condi = (isinstance(entry, str) and - str(entry.resolve()).startswith('derivatives')) - if condi: - msg = ("Do not pass 'derivatives' in the force_index " - "list. To index derivatives, either set " - "derivatives=True, or use add_derivatives().") - raise ValueError(msg) + for entry in force_index: + condi = (isinstance(entry, str) and + str(entry.resolve()).startswith('derivatives')) + if condi: + msg = ("Do not pass 'derivatives' in the force_index " + "list. To index derivatives, either set " + "derivatives=True, or use add_derivatives().") + raise ValueError(msg) return ignore, force_index