Skip to content

Commit

Permalink
Merge pull request #859 from oesteban/fix/matching-paths
Browse files Browse the repository at this point in the history
FIX: Match only within relative path when indexer is validating
  • Loading branch information
effigies authored Jul 22, 2022
2 parents 9f01802 + ba43619 commit 107de32
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 52 deletions.
82 changes: 63 additions & 19 deletions bids/layout/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,62 @@ def _extract_entities(bidsfile, entities):
return match_vals


def _check_path_matches_patterns(path, patterns):
def _check_path_matches_patterns(path, patterns, root=None):
"""Check if the path matches at least one of the provided patterns. """
if not patterns:
return False

path = path.absolute()
if root is not None:
path = Path("/") / path.relative_to(root)

# Path now can be downcast to str
path = str(path)
for patt in patterns:
if isinstance(patt, Path):
if path == patt:
if hasattr(patt, "search"):
if patt.search(path):
return True
elif patt.search(str(path)):
return True
else:
continue

if isinstance(patt, Path):
patt = str(
patt if root is None
else Path("/") / patt.relative_to(root)
)
if str(patt) in path:
return str(patt)

return False


def _validate_path(path, incl_patt=None, excl_patt=None, root=None, default=None):
incl_matched = _check_path_matches_patterns(path, incl_patt, root=root)
excl_matched = _check_path_matches_patterns(path, excl_patt, root=root)

if incl_matched is False and excl_matched is False:
return default

# Include: if a inclusion regex pattern matched or a nonregex inclusion pattern matched
if incl_matched is True or (incl_matched and not excl_matched):
return True

if excl_matched is True:
return False

if not incl_matched and excl_matched:
return False

# Both matched: nested pattern
if incl_matched not in excl_matched:
return True

if excl_matched not in incl_matched:
return False

return default


class BIDSLayoutIndexer:
""" Indexer class for BIDSLayout.
Expand Down Expand Up @@ -114,19 +156,16 @@ def __call__(self, layout):
def session(self):
return self._layout.connection_manager.session

def _validate_dir(self, d, default=None):
if _check_path_matches_patterns(d, self._include_patterns):
return True
if _check_path_matches_patterns(d, self._exclude_patterns):
return False
return default

def _validate_file(self, f, default=None):
# Inclusion takes priority over exclusion
if _check_path_matches_patterns(f, self._include_patterns):
return True
if _check_path_matches_patterns(f, self._exclude_patterns):
return False
matched_patt = _validate_path(
f,
incl_patt=self._include_patterns,
excl_patt=self._exclude_patterns,
default=default,
root=self._layout._root
)
if matched_patt is not None:
return matched_patt

# If inclusion/exclusion is inherited from a parent directory, that
# takes precedence over the remaining file-level rules
Expand Down Expand Up @@ -173,7 +212,13 @@ def _index_dir(self, path, config, default_action=None):
_, dirnames, filenames = next(os.walk(path))

# Set the default inclusion/exclusion directive
default = self._validate_dir(path, default=default_action)
default = _validate_path(
path,
incl_patt=self._include_patterns,
excl_patt=self._exclude_patterns,
default=default_action,
root=self._layout._root,
)

# If layout configuration file exists, delete it
if self.config_filename in filenames:
Expand All @@ -192,7 +237,6 @@ def _index_dir(self, path, config, default_action=None):
d = path / d
self._index_dir(d, list(config), default_action=default)


def _index_file(self, f, dirpath, entities, default_action=None):
"""Create DB record for file and its tags. """
abs_fn = dirpath / f
Expand Down
87 changes: 72 additions & 15 deletions bids/layout/tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from bids.layout import BIDSLayout, Query
from bids.layout.models import Config
from bids.layout.index import BIDSLayoutIndexer
from bids.layout.index import BIDSLayoutIndexer, _check_path_matches_patterns
from bids.layout.utils import PaddedInt
from bids.tests import get_test_data_path
from bids.utils import natural_sort
Expand Down Expand Up @@ -43,7 +43,11 @@ def test_layout_init(layout_7t_trt):
])
def test_index_metadata(index_metadata, query, result, mock_config):
data_dir = join(get_test_data_path(), '7t_trt')
layout = BIDSLayout(data_dir, index_metadata=index_metadata, **query)
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(index_metadata=index_metadata),
**query
)
sample_file = layout.get(task='rest', extension='.nii.gz',
acquisition='fullbrain')[0]
metadata = sample_file.get_metadata()
Expand Down Expand Up @@ -425,22 +429,39 @@ def test_nested_include_exclude():
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')

# Nest a directory exclusion within an inclusion
layout = BIDSLayout(data_dir, validate=True, force_index=['models'],
ignore=[os.path.join('models', 'extras')])
assert layout.get_file(target1)
assert not layout.get_file(target2)

# Nest a directory inclusion within an exclusion
layout = BIDSLayout(data_dir, validate=True, ignore=['models'],
force_index=[os.path.join('models', 'extras')])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=[os.path.join('models', 'extras')],
ignore=['models'],
),
)
assert not layout.get_file(target1)
assert layout.get_file(target2)

# Nest a directory exclusion within an inclusion
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=['models'],
ignore=[os.path.join('models', 'extras')],
),
)
assert layout.get_file(target1)
assert not layout.get_file(target2)

# Force file inclusion despite directory-level exclusion
models = ['models', target2]
layout = BIDSLayout(data_dir, validate=True, force_index=models,
ignore=[os.path.join('models', 'extras')])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(
validate=True,
force_index=['models', target2],
ignore=[os.path.join('models', 'extras')],
),
)
assert layout.get_file(target1)
assert layout.get_file(target2)

Expand All @@ -453,11 +474,17 @@ def test_nested_include_exclude_with_regex():
target1 = join(data_dir, 'models', 'ds-005_type-test_model.json')
target2 = join(data_dir, 'models', 'extras', 'ds-005_type-test_model.json')

layout = BIDSLayout(data_dir, ignore=[patt2], force_index=[patt1])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(ignore=[patt2], force_index=[patt1])
)
assert layout.get_file(target1)
assert not layout.get_file(target2)

layout = BIDSLayout(data_dir, ignore=[patt1], force_index=[patt2])
layout = BIDSLayout(
data_dir,
indexer=BIDSLayoutIndexer(ignore=[patt1], force_index=[patt2])
)
assert not layout.get_file(target1)
assert layout.get_file(target2)

Expand Down Expand Up @@ -835,3 +862,33 @@ def test_padded_run_roundtrip(layout_ds005):
assert ents["run"] == 1
newpath = layout_ds005.build_path(ents, absolute_paths=False)
assert newpath == "sub-01/func/sub-01_task-mixedgamblestask_run-01_bold.nii.gz"

@pytest.mark.parametrize(
"fname", [
"sub-01/anat/sub-01_T1w.nii.gz",
".datalad",
"code",
"sub-01/.datalad",
],
)
def test_indexer_patterns(fname):
root = Path("/home/user/.cache/data/")
path = root / fname

assert bool(_check_path_matches_patterns(
path,
["code"],
root=None,
)) is (fname == "code")

assert _check_path_matches_patterns(
path,
[re.compile(r"/\.")],
root=None,
) is True

assert _check_path_matches_patterns(
path,
[re.compile(r"/\.")],
root=root,
) is (".datalad" in fname)
54 changes: 36 additions & 18 deletions bids/layout/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@
k: val[k] for val in MANDATORY_DERIVATIVES_FIELDS.values() for k in val}


DEFAULT_LOCATIONS_TO_IGNORE = ("code", "stimuli", "sourcedata", "models",
re.compile(r'^\.'))
DEFAULT_LOCATIONS_TO_IGNORE = {
"code",
"stimuli",
"sourcedata",
"models",
re.compile(r'^\.'),
}

def absolute_path_deprecation_warning():
warnings.warn("The absolute_paths argument will be removed from PyBIDS "
Expand Down Expand Up @@ -172,27 +177,40 @@ def check_for_description(bids_dir):
return paths


def _sort_patterns(patterns, root):
"""Return sorted patterns, from more specific to more general."""
regexes = [patt for patt in patterns if hasattr(patt, "search")]

paths = [
str((root / patt).absolute())
for patt in listify(patterns)
if not hasattr(patt, "search")
]
# Sort patterns from general to specific
paths.sort(key=len)

# Combine and return (note path patterns are reversed, specific first)
return [Path(p) for p in reversed(paths)] + regexes


def validate_indexing_args(ignore, force_index, root):
if ignore is None:
ignore = DEFAULT_LOCATIONS_TO_IGNORE
ignore = list(
DEFAULT_LOCATIONS_TO_IGNORE - set(force_index or [])
)

# root has already been validated to be a directory
ignore = [(root / patt).absolute()
if isinstance(patt, str) else patt
for patt in listify(ignore or [])]
force_index = [(root / patt).absolute()
if isinstance(patt, str) else patt
for patt in listify(force_index or [])]
ignore = _sort_patterns(ignore, root)
force_index = _sort_patterns(force_index or [], root)

# Derivatives get special handling; they shouldn't be indexed normally
if force_index is not None:
for entry in force_index:
condi = (isinstance(entry, str) and
str(entry.resolve()).startswith('derivatives'))
if condi:
msg = ("Do not pass 'derivatives' in the force_index "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
raise ValueError(msg)
for entry in force_index:
condi = (isinstance(entry, str) and
str(entry.resolve()).startswith('derivatives'))
if condi:
msg = ("Do not pass 'derivatives' in the force_index "
"list. To index derivatives, either set "
"derivatives=True, or use add_derivatives().")
raise ValueError(msg)

return ignore, force_index

0 comments on commit 107de32

Please sign in to comment.