Skip to content
This repository has been archived by the owner on Aug 8, 2024. It is now read-only.

FIXES: invalid filters, mark all xfails, etc.. #40

Merged
merged 16 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 128 additions & 34 deletions bids/layout/layout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import difflib
import enum
import os.path
import typing
from collections import defaultdict
from functools import partial
from pathlib import Path
Expand All @@ -10,6 +11,7 @@
from .models import BIDSFile
from .utils import BIDSMetadata
from .writing import build_path, write_to_file
from ..external import inflect
from ..exceptions import (
BIDSEntityError,
BIDSValidationError,
Expand Down Expand Up @@ -265,7 +267,7 @@ def __init__(
database_path: Optional[str]=None,
reset_database: Optional[bool]=None,
indexer: Optional[Callable]=None,
absolute_paths: Optional[bool]=None,
absolute_paths: Optional[bool]=True,
ignore: Optional[List[str]]=None,
force_index: Optional[List[str]]=None,
**kwargs,
Expand Down Expand Up @@ -296,6 +298,7 @@ def __init__(
self.validationReport = None

self._regex_search = regex_search
self._absolute_paths = absolute_paths

if validate:
self.validationReport = self.validate()
Expand All @@ -318,11 +321,6 @@ def __init__(
"indexer no longer has any effect and will be removed",
DeprecationWarning
)
if absolute_paths is not None:
warnings.warn(
"absolute_paths no longer has any effect and will be removed",
DeprecationWarning
)
if kwargs:
warnings.warn(f"Unknown keyword arguments: {kwargs}")
if config is not None:
Expand All @@ -340,12 +338,16 @@ def __getattr__(self, key):
except KeyError:
pass
if key.startswith('get_'):
orig_ent_name = key.replace('get_', '')
ent_name = self.schema.fuzzy_match_entity(orig_ent_name).name
if ent_name not in self.get_entities():
raise BIDSEntityError(
"'get_{}' can't be called because '{}' isn't a "
"recognized entity name.".format(orig_ent_name, orig_ent_name))
ent_name = key.replace('get_', '')
entities = self.get_entities(metadata=True)
if ent_name not in entities:
sing = inflect.engine().singular_noun(ent_name)
if sing in entities:
ent_name = sing
else:
raise BIDSEntityError(
"'get_{}' can't be called because '{}' isn't a "
"recognized entity name.".format(ent_name, ent_name))
return partial(self.get, return_type='id', target=ent_name)
# Spit out default message if we get this far
raise AttributeError("%s object has no attribute named %r" %
Expand Down Expand Up @@ -541,11 +543,88 @@ def count_matches(f):
else match for match in matches]
return matches if all_ else matches[0] if matches else None

def _sanitize_validate_query(self, target, filters, return_type, invalid_filters, regex_search):
"""Sanitize and validate query parameters

Parameters
----------
target : str
Name of the target entity to get results for.
filters : dict
Dictionary of filters to apply to the query.
return_type : str
The type of object to return. Must be one of 'filename',
'object', or 'dir'.
invalid_filters : str
What to do when an invalid filter is encountered. Must be one
of 'error', 'drop', or 'allow'.

Returns
-------
target : str
The sanitized target.
filters : dict
The sanitized filters.
"""
# error check on users accidentally passing in filters
if isinstance(filters.get('filters'), dict):
raise RuntimeError('You passed in filters as a dictionary named '
'filters; please pass the keys in as named '
'keywords to the `get()` call. For example: '
'`layout.get(**filters)`.')

schema_entities = [e.name for e in self.schema.EntityEnum]

# Provide some suggestions for target and filter names
def _suggest(target):
"""Suggest a valid value for an entity."""
potential = list(schema_entities)
suggestions = difflib.get_close_matches(target, potential)
if suggestions:
message = ". Did you mean one of: {}?".format(suggestions)
else:
message = ""
return message

if return_type in ("dir", "id"):
if target is None:
raise TargetError(f'If return_type is "id" or "dir", a valid target '
'entity must also be specified.')

if target not in schema_entities:
raise TargetError(f"Unknown target '{target}'{_suggest(target)}")

if invalid_filters != 'allow':
bad_filters = set(filters.keys()) - set(schema_entities)
if bad_filters:
if invalid_filters == 'drop':
for bad_filt in bad_filters:
filters.pop(bad_filt)
elif invalid_filters == 'error':
first_bad = list(bad_filters)[0]
message = _suggest(first_bad)
raise ValueError(
f"Unknown entity '{first_bad}'{message} If you're sure you want to impose " + \
"this constraint, set invalid_filters='allow'.")

# Process Query Enum
if filters:
for k, val in filters.items():
if val == Query.OPTIONAL:
del filters[k]
elif val == Query.REQUIRED:
regex_search = True # Force true if these are defined
filters[k] = '.+'
elif val == Query.NONE:
regex_search = True
filters[k] = '^$'

return target, filters, regex_search

def get(self, return_type: str = 'object', target: str = None, scope: str = None,
extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None,
regex_search=None,
**entities) -> Union[List[str], List[object]]:
regex_search=None, invalid_filters: str = 'error',
**filters) -> Union[List[str], List[object]]:
"""Retrieve files and/or metadata from the current Layout.

Parameters
Expand Down Expand Up @@ -597,32 +676,25 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None
list of :obj:`bids.layout.BIDSFile` or str
A list of BIDSFiles (default) or strings (see return_type).
"""

if regex_search is None:
regex_search = self._regex_search

# Provide some suggestions if target is specified and invalid.
if return_type in ("dir", "id"):
if target is None:
raise TargetError(f'If return_type is "id" or "dir", a valid target '
'entity must also be specified.')
self_entities = self.get_entities()
if target not in self_entities:
potential = list(self_entities.keys())
suggestions = difflib.get_close_matches(target, potential)
if suggestions:
message = "Did you mean one of: {}?".format(suggestions)
else:
message = "Valid targets are: {}".format(potential)
raise TargetError(f"Unknown target '{target}'. {message}")

# Sanitize & validate query
target, filters, regex_search = self._sanitize_validate_query(target, filters, return_type,
invalid_filters, regex_search)

folder = self.dataset
result = query(folder, return_type, target, scope, extension, suffix, regex_search, **entities)

result = query(folder, return_type, target, scope, extension, suffix, regex_search,
absolute_paths=self._absolute_paths, **filters)

if return_type == 'file':
result = natural_sort(result)
if return_type == "object":
if result:
result = natural_sort(
[BIDSFile(res) for res in result],
[BIDSFile(res, absolute_path=self._absolute_paths) for res in result],
"path"
)
return result
Expand All @@ -631,7 +703,8 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None
def entities(self):
return self.get_entities()

def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = True) -> dict:
def get_entities(self, scope: str = None, sort: bool = False,
long_form: bool = True, metadata: bool = True) -> dict:
"""Returns a unique set of entities found within the dataset as a dict.
Each key of the resulting dict contains a list of values (with at least one element).

Expand All @@ -657,7 +730,29 @@ def get_entities(self, scope: str = None, sort: bool = False, long_form: bool =
dict
a unique set of entities found within the dataset as a dict
"""
return query_entities(self.dataset, scope, sort, long_form=long_form)

entities = query_entities(self.dataset, scope, long_form=long_form)

if metadata is True:
results = {**entities, **self._get_unique_metadata()}

if sort:
results = {k: sorted(v) for k, v in sorted(results.items())}

return results

def _get_unique_metadata(self):
"""Return a list of all unique metadata key and values found in the dataset."""

all_metadata_objects = self.dataset.select(self.schema.MetadataArtifact).objects()

metadata = defaultdict(set)
for obj in all_metadata_objects:
for k, v in obj['contents'].items():
if isinstance(v, typing.Hashable):
metadata[k].add(v)

return metadata

def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]:
"""Return contents of dataset_description.json.
Expand Down Expand Up @@ -867,7 +962,6 @@ def __repr__(self):
"Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs))
return s


class Query(enum.Enum):
"""Enums for use with BIDSLayout.get()."""
NONE = 1 # Entity must not be present
Expand Down
10 changes: 8 additions & 2 deletions bids/layout/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ def from_filename(cls, filename):
break
return cls(path)

def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None):
def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None,
absolute_path=True):

self._path = None
self._artifact = None
self._schema = schema
self._absolute_path = absolute_path
if isinstance(file_ref, (str, os.PathLike)):
self._path = Path(file_ref)
elif isinstance(file_ref, Artifact):
Expand All @@ -51,7 +54,10 @@ def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None):
def path(self):
""" Convenience property for accessing path as a string."""
try:
return self._artifact.get_absolute_path()
if self._absolute_path:
return self._artifact.get_absolute_path()
else:
return self._artifact.get_relative_path()
except AttributeError:
return str(self._path)

Expand Down
4 changes: 2 additions & 2 deletions bids/layout/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def layout_7t_trt_relpath():
data_dir = join(get_test_data_path(), '7t_trt')
return BIDSLayout(data_dir, absolute_paths=False)


# AD: Manually ignoring derivatives for now
@pytest.fixture(scope="module")
def layout_ds005():
data_dir = join(get_test_data_path(), 'ds005')
return BIDSLayout(data_dir)
return BIDSLayout(data_dir, ignore=['derivatives', 'models'])


@pytest.fixture(scope="module")
Expand Down
Loading