Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: Match only within relative path when indexer is validating #859

Merged
merged 8 commits into from
Jul 22, 2022
82 changes: 63 additions & 19 deletions bids/layout/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,62 @@ def _extract_entities(bidsfile, entities):
return match_vals


def _check_path_matches_patterns(path, patterns):
def _check_path_matches_patterns(path, patterns, root=None):
"""Check if the path matches at least one of the provided patterns. """
if not patterns:
return False

path = path.absolute()
if root is not None:
path = Path("/") / path.relative_to(root)

# Path now can be downcast to str
path = str(path)
for patt in patterns:
if isinstance(patt, Path):
if path == patt:
if hasattr(patt, "search"):
if patt.search(path):
return True
elif patt.search(str(path)):
return True
else:
continue

if isinstance(patt, Path):
patt = str(
patt if root is None
else Path("/") / patt.relative_to(root)
)
if str(patt) in path:
return str(patt)

return False


def _validate_path(path, incl_patt=None, excl_patt=None, root=None, default=None):
incl_matched = _check_path_matches_patterns(path, incl_patt, root=root)
excl_matched = _check_path_matches_patterns(path, excl_patt, root=root)

if incl_matched is False and excl_matched is False:
return default

# Include: if a inclusion regex pattern matched or a nonregex inclusion pattern matched
if incl_matched is True or (incl_matched and not excl_matched):
return True

if excl_matched is True:
return False

if not incl_matched and excl_matched:
return False

# Both matched: nested pattern
if incl_matched not in excl_matched:
return True

if excl_matched not in incl_matched:
return False

return default


class BIDSLayoutIndexer:
""" Indexer class for BIDSLayout.

Expand Down Expand Up @@ -114,19 +156,16 @@ def __call__(self, layout):
def session(self):
return self._layout.connection_manager.session

def _validate_dir(self, d, default=None):
if _check_path_matches_patterns(d, self._include_patterns):
return True
if _check_path_matches_patterns(d, self._exclude_patterns):
return False
return default

def _validate_file(self, f, default=None):
# Inclusion takes priority over exclusion
if _check_path_matches_patterns(f, self._include_patterns):
return True
if _check_path_matches_patterns(f, self._exclude_patterns):
return False
matched_patt = _validate_path(
f,
incl_patt=self._include_patterns,
excl_patt=self._exclude_patterns,
default=default,
root=self._layout._root
)
if matched_patt is not None:
return matched_patt

# If inclusion/exclusion is inherited from a parent directory, that
# takes precedence over the remaining file-level rules
Expand Down Expand Up @@ -173,7 +212,13 @@ def _index_dir(self, path, config, default_action=None):
_, dirnames, filenames = next(os.walk(path))

# Set the default inclusion/exclusion directive
default = self._validate_dir(path, default=default_action)
default = _validate_path(
path,
incl_patt=self._include_patterns,
excl_patt=self._exclude_patterns,
default=default_action,
root=self._layout._root,
)

# If layout configuration file exists, delete it
if self.config_filename in filenames:
Expand All @@ -192,7 +237,6 @@ def _index_dir(self, path, config, default_action=None):
d = path / d
self._index_dir(d, list(config), default_action=default)


def _index_file(self, f, dirpath, entities, default_action=None):
"""Create DB record for file and its tags. """
abs_fn = dirpath / f
Expand Down
87 changes: 72 additions & 15 deletions bids/layout/tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from bids.layout import BIDSLayout, Query
from bids.layout.models import Config
from bids.layout.index import BIDSLayoutIndexer
from bids.layout.index import BIDSLayoutIndexer, _check_path_matches_patterns
from bids.layout.utils import PaddedInt
from bids.tests import get_test_data_path
from bids.utils import natural_sort
Expand Down Expand Up @@ -43,7 +43,11 @@ def test_layout_init(layout_7t_trt):
])
def test_index_metadata(index_metadata, query, result, mock_config):
data_dir = join(get_test_data_path(), '7t_trt')
layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query)
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(index_metadata=index_metadata),
**query
)
sample_file = layout.get(task='rest', extension='.nii.gz',
acquisition='fullbrain')[0]
metadata = sample_file.get_metadata()
Expand Down Expand Up @@ -425,22 +429,39 @@ def test_nested_include_exclude():
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')

# Nest a directory exclusion within an inclusion
layout = BIDSLayout(data_dir, validate=True, force_index=['models'],
ignore=[os.path.join('models', 'extras')])
assert layout.get_file(target1)
assert not layout.get_file(target2)

# Nest a directory inclusion within an exclusion
layout = BIDSLayout(data_dir, validate=True, ignore=['models'],
force_index=[os.path.join('models', 'extras')])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=[os.path.join('models', 'extras')],
ignore=['models'],
),
)
assert not layout.get_file(target1)
assert layout.get_file(target2)

# Nest a directory exclusion within an inclusion
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=['models'],
ignore=[os.path.join('models', 'extras')],
),
)
assert layout.get_file(target1)
assert not layout.get_file(target2)

# Force file inclusion despite directory-level exclusion
models = ['models', target2]
layout = BIDSLayout(data_dir, validate=True, force_index=models,
ignore=[os.path.join('models', 'extras')])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=['models', target2],
ignore=[os.path.join('models', 'extras')],
),
)
assert layout.get_file(target1)
assert layout.get_file(target2)

Expand All @@ -453,11 +474,17 @@ def test_nested_include_exclude_with_regex():
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')

layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(ignore=[patt2], force_index=[patt1])
)
assert layout.get_file(target1)
assert not layout.get_file(target2)

layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(ignore=[patt1], force_index=[patt2])
)
assert not layout.get_file(target1)
assert layout.get_file(target2)

Expand Down Expand Up @@ -835,3 +862,33 @@ def test_padded_run_roundtrip(layout_ds005):
assert ents["run"] == 1
newpath = layout_ds005.build_path(ents, absolute_paths=False)
assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz"

@pytest.mark.parametrize(
"fname", [
"sub-01/anat/sub-01_T1w.nii.gz",
".datalad",
"code",
"sub-01/.datalad",
],
)
def test_indexer_patterns(fname):
root = Path("/home/user/.cache/data/")
path = root / fname

assert bool(_check_path_matches_patterns(
path,
["code"],
root=None,
)) is (fname == "code")

assert _check_path_matches_patterns(
path,
[re.compile(r"/\.")],
root=None,
) is True

assert _check_path_matches_patterns(
path,
[re.compile(r"/\.")],
root=root,
) is (".datalad" in fname)
54 changes: 36 additions & 18 deletions bids/layout/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@
k: val[k] for val in MANDATORY_DERIVATIVES_FIELDS.values() for k in val}


DEFAULT_LOCATIONS_TO_IGNORE = ("code", "stimuli", "sourcedata", "models",
re.compile(r'^\.'))
DEFAULT_LOCATIONS_TO_IGNORE = {
"code",
"stimuli",
"sourcedata",
"models",
re.compile(r'^\.'),
}

def absolute_path_deprecation_warning():
warnings.warn("The absolute_paths argument will be removed from PyBIDS "
Expand Down Expand Up @@ -172,27 +177,40 @@ def check_for_description(bids_dir):
return paths


def _sort_patterns(patterns, root):
"""Return sorted patterns, from more specific to more general."""
regexes = [patt for patt in patterns if hasattr(patt, "search")]

paths = [
str((root / patt).absolute())
for patt in listify(patterns)
if not hasattr(patt, "search")
]
# Sort patterns from general to specific
paths.sort(key=len)

# Combine and return (note path patterns are reversed, specific first)
return [Path(p) for p in reversed(paths)] + regexes


def validate_indexing_args(ignore, force_index, root):
if ignore is None:
ignore = DEFAULT_LOCATIONS_TO_IGNORE
ignore = list(
DEFAULT_LOCATIONS_TO_IGNORE - set(force_index or [])
)

# root has already been validated to be a directory
ignore = [(root / patt).absolute()
if isinstance(patt, str) else patt
for patt in listify(ignore or [])]
force_index = [(root / patt).absolute()
if isinstance(patt, str) else patt
for patt in listify(force_index or [])]
ignore = _sort_patterns(ignore, root)
force_index = _sort_patterns(force_index or [], root)

# Derivatives get special handling; they shouldn't be indexed normally
if force_index is not None:
for entry in force_index:
condi = (isinstance(entry, str) and
str(entry.resolve()).startswith('derivatives'))
if condi:
msg = ("Do not pass 'derivatives' in the force_index "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
raise ValueError(msg)
for entry in force_index:
condi = (isinstance(entry, str) and
str(entry.resolve()).startswith('derivatives'))
if condi:
msg = ("Do not pass 'derivatives' in the force_index "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
raise ValueError(msg)

return ignore, force_index