Skip to content

Commit

Permalink
6721 bundle syntax # as alias of :: (#6955)
Browse files Browse the repository at this point in the history
fixes #6721

### Description

compatible syntax by normalising any `#` in ids to `::`

```py
from monai.bundle import ConfigParser

config = {
    "my_dims": 2,
    "dims_1": "$@my_dims + 1",
    "my_net": {"_target_": "BasicUNet", "spatial_dims": "@dims_1", "in_channels": 1, "out_channels": 4},
}
# in the example $@my_dims + 1 is an expression, which adds 1 to the value of @my_dims
parser = ConfigParser(config)
print(parser.get_parsed_content("my_net::spatial_dims"))  # returns 3
print(parser.get_parsed_content("my_net#spatial_dims"))  # returns 3

```

new test cases:

https://github.com/Project-MONAI/MONAI/blob/66b50fb8384ae2dc3c76b39de673ce76908f94f2/tests/test_config_parser.py#L317-L321

### Types of changes
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Non-breaking change (fix or new feature that would not break
existing functionality).
- [ ] Breaking change (fix or new feature that would cause existing
functionality to change).
- [x] New tests added to cover the changes.
- [x] Integration tests passed locally by running `./runtests.sh -f -u
--net --coverage`.
- [x] Quick tests passed locally by running `./runtests.sh --quick
--unittests --disttests`.
- [x] In-line docstrings updated.
- [x] Documentation updated, tested `make html` command in the `docs/`
folder.

---------

Signed-off-by: Wenqi Li <[email protected]>
  • Loading branch information
wyli authored Sep 8, 2023
1 parent ef7debe commit 8040aa2
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 56 deletions.
15 changes: 8 additions & 7 deletions docs/source/config_syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,15 @@ A few characters and keywords are interpreted beyond the plain texts, here are e
### To reference Python objects in configurations

```json
"@preprocessing#transforms#keys"
"@preprocessing::transforms::keys"
```

_Description:_ `@` character indicates a reference to another configuration value defined at `preprocessing#transforms#keys`.
where `#` indicates a sub-structure of this configuration file.
_Description:_ `@` character indicates a reference to another configuration value defined at `preprocessing::transforms::keys`.
where `::` indicates a sub-structure of this configuration file. (`#` is a synonym for `::`, `preprocessing#transforms#keys`
refers to the same object.)

```json
"@preprocessing#1"
"@preprocessing::1"
```

_Description:_ `1` is referencing as an integer, which is used to index (zero-based indexing) the `preprocessing` sub-structure.
Expand Down Expand Up @@ -122,10 +123,10 @@ It's therefore possible to modify the Python objects within an expression, for e
### To textually replace configuration elements

```json
"%demo_config.json#demo_net#in_channels"
"%demo_config.json::demo_net::in_channels"
```

_Description:_ `%` character indicates a macro to replace the current configuration element with the texts at `demo_net#in_channels` in the
_Description:_ `%` character indicates a macro to replace the current configuration element with the texts at `demo_net::in_channels` in the
`demo_config.json` file. The replacement is done before instantiating or evaluating the components.

### Instantiate a Python object
Expand Down Expand Up @@ -203,6 +204,6 @@ Details on the CLI argument parsing is provided in the
simple structures with sparse uses of expressions or references are preferred.
- For `$import <module>` in the configuration, please make sure there are instructions for the users to install
the `<module>` if it is not a (optional) dependency of MONAI.
- As "#" and "$" might be interpreted differently by the `shell` or `CLI` tools, may need to add escape characters
- As `#`, `::`, and `$` might be interpreted differently by the `shell` or `CLI` tools, may need to add escape characters
or quotes for them in the command line, like: `"\$torch.device('cuda:1')"`, `"'train_part#trainer'"`.
- For more details and examples, please see [the tutorials](https://github.com/Project-MONAI/tutorials/tree/main/bundle).
49 changes: 24 additions & 25 deletions monai/bundle/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,16 @@ def __getitem__(self, id: str | int) -> Any:
Get the config by id.
Args:
id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to
id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to
go one level further into the nested structures.
Use digits indexing from "0" for list or other strings for dict.
For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``.
For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``.
"""
if id == "":
return self.config
config = self.config
for k in str(id).split(ID_SEP_KEY):
for k in ReferenceResolver.split_id(id):
if not isinstance(config, (dict, list)):
raise ValueError(f"config must be dict or list for key `{k}`, but got {type(config)}: {config}.")
try:
Expand All @@ -167,23 +167,22 @@ def __setitem__(self, id: str | int, config: Any) -> None:
to ensure the updates are included in the parsed content.
Args:
id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to
id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to
go one level further into the nested structures.
Use digits indexing from "0" for list or other strings for dict.
For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``.
For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``.
config: config to set at location ``id``.
"""
if id == "":
self.config = config
self.ref_resolver.reset()
return
keys = str(id).split(ID_SEP_KEY)
last_id, base_id = ReferenceResolver.split_id(id, last=True)
# get the last parent level config item and replace it
last_id = ID_SEP_KEY.join(keys[:-1])
conf_ = self[last_id]

indexing = keys[-1] if isinstance(conf_, dict) else int(keys[-1])
indexing = base_id if isinstance(conf_, dict) else int(base_id)
conf_[indexing] = config
self.ref_resolver.reset()
return
Expand Down Expand Up @@ -213,7 +212,7 @@ def set(self, config: Any, id: str = "", recursive: bool = True) -> None:
default to `True`. for the nested id, only support `dict` for the missing section.
"""
keys = str(id).split(ID_SEP_KEY)
keys = ReferenceResolver.split_id(id)
conf_ = self.get()
if recursive:
if conf_ is None:
Expand All @@ -222,12 +221,12 @@ def set(self, config: Any, id: str = "", recursive: bool = True) -> None:
if isinstance(conf_, dict) and k not in conf_:
conf_[k] = {}
conf_ = conf_[k if isinstance(conf_, dict) else int(k)]
self[id] = config
self[ReferenceResolver.normalize_id(id)] = config

def update(self, pairs: dict[str, Any]) -> None:
"""
Set the ``id`` and the corresponding config content in pairs, see also :py:meth:`__setitem__`.
For example, ``parser.update({"train#epoch": 100, "train#lr": 0.02})``
For example, ``parser.update({"train::epoch": 100, "train::lr": 0.02})``
Args:
pairs: dictionary of `id` and config pairs.
Expand Down Expand Up @@ -272,10 +271,10 @@ def get_parsed_content(self, id: str = "", **kwargs: Any) -> Any:
- Else, the result is the configuration content of `ConfigItem`.
Args:
id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to
id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to
go one level further into the nested structures.
Use digits indexing from "0" for list or other strings for dict.
For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``.
For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``.
kwargs: additional keyword arguments to be passed to ``_resolve_one_item``.
Currently support ``lazy`` (whether to retain the current config cache, default to `True`),
``instantiate`` (whether to instantiate the `ConfigComponent`, default to `True`) and
Expand Down Expand Up @@ -330,16 +329,15 @@ def _do_resolve(self, config: Any, id: str = "") -> Any:
Args:
config: input config file to resolve.
id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to
id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to
go one level further into the nested structures.
Use digits indexing from "0" for list or other strings for dict.
For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``.
For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``.
"""
if isinstance(config, (dict, list)):
for k, v in enumerate(config) if isinstance(config, list) else config.items():
sub_id = f"{id}{ID_SEP_KEY}{k}" if id != "" else k
config[k] = self._do_resolve(v, sub_id)
for k, sub_id, v in self.ref_resolver.iter_subconfigs(id=id, config=config):
config[k] = self._do_resolve(v, sub_id) # type: ignore
if isinstance(config, str):
config = self.resolve_relative_ids(id, config)
if config.startswith(MACRO_KEY):
Expand All @@ -354,7 +352,7 @@ def resolve_macro_and_relative_ids(self):
Recursively resolve `self.config` to replace the relative ids with absolute ids, for example,
`@##A` means `A` in the upper level. and replace the macro tokens with target content,
The macro tokens are marked as starting with "%", can be from another structured file, like:
``"%default_net"``, ``"%/data/config.json#net"``.
``"%default_net"``, ``"%/data/config.json::net"``.
"""
self.set(self._do_resolve(config=self.get()))
Expand All @@ -365,15 +363,14 @@ def _do_parse(self, config: Any, id: str = "") -> None:
Args:
config: config source to parse.
id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to
id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to
go one level further into the nested structures.
Use digits indexing from "0" for list or other strings for dict.
For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``.
For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``.
"""
if isinstance(config, (dict, list)):
for k, v in enumerate(config) if isinstance(config, list) else config.items():
sub_id = f"{id}{ID_SEP_KEY}{k}" if id != "" else k
for _, sub_id, v in self.ref_resolver.iter_subconfigs(id=id, config=config):
self._do_parse(config=v, id=sub_id)

if ConfigComponent.is_instantiable(config):
Expand Down Expand Up @@ -410,7 +407,7 @@ def load_config_files(cls, files: PathLike | Sequence[PathLike] | dict, **kwargs
"""
Load config files into a single config dict.
The latter config file in the list will override or add the former config file.
``"#"`` in the config keys are interpreted as special characters to go one level
``"::"`` (or ``"#"``) in the config keys are interpreted as special characters to go one level
further into the nested structures.
Args:
Expand Down Expand Up @@ -451,13 +448,14 @@ def export_config_file(cls, config: dict, filepath: PathLike, fmt: str = "json",
def split_path_id(cls, src: str) -> tuple[str, str]:
"""
Split `src` string into two parts: a config file path and component id.
The file path should end with `(json|yaml|yml)`. The component id should be separated by `#` if it exists.
The file path should end with `(json|yaml|yml)`. The component id should be separated by `::` if it exists.
If no path or no id, return "".
Args:
src: source string to split.
"""
src = ReferenceResolver.normalize_id(src)
result = re.compile(rf"({cls.suffix_match}(?=(?:{ID_SEP_KEY}.*)|$))", re.IGNORECASE).findall(src)
if not result:
return "", src # the src is a pure id
Expand Down Expand Up @@ -488,6 +486,7 @@ def resolve_relative_ids(cls, id: str, value: str) -> str:
"""
# get the prefixes like: "@####", "%###", "@#"
value = ReferenceResolver.normalize_id(value)
prefixes = sorted(set().union(cls.relative_id_prefix.findall(value)), reverse=True)
current_id = id.split(ID_SEP_KEY)

Expand Down
70 changes: 54 additions & 16 deletions monai/bundle/reference_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import re
import warnings
from collections.abc import Sequence
from typing import Any
from typing import Any, Iterator

from monai.bundle.config_item import ConfigComponent, ConfigExpression, ConfigItem
from monai.bundle.utils import ID_REF_KEY, ID_SEP_KEY
Expand All @@ -31,7 +31,7 @@ class ReferenceResolver:
The IDs must be unique within this set. A string in ``ConfigItem``
starting with ``@`` will be treated as a reference to other ``ConfigItem`` objects by ID.
Since ``ConfigItem`` may have a nested dictionary or list structure,
the reference string may also contain a ``#`` character to refer to a substructure by
the reference string may also contain the separator ``::`` to refer to a substructure by
key indexing for a dictionary or integer indexing for a list.
In this class, resolving references is essentially substitution of the reference strings with the
Expand All @@ -52,7 +52,7 @@ class ReferenceResolver:
_vars = "__local_refs"
sep = ID_SEP_KEY # separator for key indexing
ref = ID_REF_KEY # reference prefix
# match a reference string, e.g. "@id#key", "@id#key#0", "@_target_#key"
# match a reference string, e.g. "@id::key", "@id::key::0", "@_target_::key"
id_matcher = re.compile(rf"{ref}(?:\w*)(?:{sep}\w*)*")
# if `allow_missing_reference` and can't find a reference ID, will just raise a warning and don't update the config
allow_missing_reference = allow_missing_reference
Expand Down Expand Up @@ -99,6 +99,7 @@ def get_item(self, id: str, resolve: bool = False, **kwargs: Any) -> ConfigItem
kwargs: keyword arguments to pass to ``_resolve_one_item()``.
Currently support ``instantiate`` and ``eval_expr``. Both are defaulting to True.
"""
id = self.normalize_id(id)
if resolve and id not in self.resolved_content:
self._resolve_one_item(id=id, **kwargs)
return self.items.get(id)
Expand All @@ -121,6 +122,7 @@ def _resolve_one_item(
if the `id` is not in the config content, must be a `ConfigItem` object.
"""
id = self.normalize_id(id)
if id in self.resolved_content:
return self.resolved_content[id]
try:
Expand Down Expand Up @@ -190,18 +192,56 @@ def get_resolved_content(self, id: str, **kwargs: Any) -> ConfigExpression | str
"""
return self._resolve_one_item(id=id, **kwargs)

@classmethod
def normalize_id(cls, id: str | int) -> str:
"""
Normalize the id string to consistently use `cls.sep`.
Args:
id: id string to be normalized.
"""
return str(id).replace("#", cls.sep) # backward compatibility `#` is the old separator

@classmethod
def split_id(cls, id: str | int, last: bool = False) -> list[str]:
"""
Split the id string into a list of strings by `cls.sep`.
Args:
id: id string to be split.
last: whether to split the rightmost part of the id. default is False (split all parts).
"""
if not last:
return cls.normalize_id(id).split(cls.sep)
res = cls.normalize_id(id).rsplit(cls.sep, 1)
return ["".join(res[:-1]), res[-1]]

@classmethod
def iter_subconfigs(cls, id: str, config: Any) -> Iterator[tuple[str, str, Any]]:
"""
Iterate over the sub-configs of the input config, the output `sub_id` uses `cls.sep` to denote substructure.
Args:
id: id string of the current input config.
config: input config to be iterated.
"""
for k, v in config.items() if isinstance(config, dict) else enumerate(config):
sub_id = f"{id}{cls.sep}{k}" if id != "" else f"{k}"
yield k, sub_id, v

@classmethod
def match_refs_pattern(cls, value: str) -> dict[str, int]:
"""
Match regular expression for the input string to find the references.
The reference string starts with ``"@"``, like: ``"@XXX#YYY#ZZZ"``.
The reference string starts with ``"@"``, like: ``"@XXX::YYY::ZZZ"``.
Args:
value: input value to match regular expression.
"""
refs: dict[str, int] = {}
# regular expression pattern to match "@XXX" or "@XXX#YYY"
# regular expression pattern to match "@XXX" or "@XXX::YYY"
value = cls.normalize_id(value)
result = cls.id_matcher.findall(value)
value_is_expr = ConfigExpression.is_expression(value)
for item in result:
Expand All @@ -215,15 +255,16 @@ def match_refs_pattern(cls, value: str) -> dict[str, int]:
def update_refs_pattern(cls, value: str, refs: dict) -> str:
"""
Match regular expression for the input string to update content with the references.
The reference part starts with ``"@"``, like: ``"@XXX#YYY#ZZZ"``.
The reference part starts with ``"@"``, like: ``"@XXX::YYY::ZZZ"``.
References dictionary must contain the referring IDs as keys.
Args:
value: input value to match regular expression.
refs: all the referring components with ids as keys, default to `None`.
"""
# regular expression pattern to match "@XXX" or "@XXX#YYY"
# regular expression pattern to match "@XXX" or "@XXX::YYY"
value = cls.normalize_id(value)
result = cls.id_matcher.findall(value)
# reversely sort the matched references by length
# and handle the longer first in case a reference item is substring of another longer item
Expand All @@ -235,11 +276,10 @@ def update_refs_pattern(cls, value: str, refs: dict) -> str:
ref_id = item[len(cls.ref) :] # remove the ref prefix "@"
if ref_id not in refs:
msg = f"can not find expected ID '{ref_id}' in the references."
if cls.allow_missing_reference:
warnings.warn(msg)
continue
else:
if not cls.allow_missing_reference:
raise KeyError(msg)
warnings.warn(msg)
continue
if value_is_expr:
# replace with local code, `{"__local_refs": self.resolved_content}` will be added to
# the `globals` argument of python `eval` in the `evaluate`
Expand All @@ -265,12 +305,11 @@ def find_refs_in_config(cls, config: Any, id: str, refs: dict[str, int] | None =
"""
refs_: dict[str, int] = refs or {}
if isinstance(config, str):
for id, count in cls.match_refs_pattern(value=config).items():
for id, count in cls.match_refs_pattern(value=config).items(): # ref count is not currently used
refs_[id] = refs_.get(id, 0) + count
if not isinstance(config, (list, dict)):
return refs_
for k, v in config.items() if isinstance(config, dict) else enumerate(config):
sub_id = f"{id}{cls.sep}{k}" if id != "" else f"{k}"
for _, sub_id, v in cls.iter_subconfigs(id, config):
if ConfigComponent.is_instantiable(v) or ConfigExpression.is_expression(v) and sub_id not in refs_:
refs_[sub_id] = 1
refs_ = cls.find_refs_in_config(v, sub_id, refs_)
Expand All @@ -294,8 +333,7 @@ def update_config_with_refs(cls, config: Any, id: str, refs: dict | None = None)
if not isinstance(config, (list, dict)):
return config
ret = type(config)()
for idx, v in config.items() if isinstance(config, dict) else enumerate(config):
sub_id = f"{id}{cls.sep}{idx}" if id != "" else f"{idx}"
for idx, sub_id, v in cls.iter_subconfigs(id, config):
if ConfigComponent.is_instantiable(v) or ConfigExpression.is_expression(v):
updated = refs_[sub_id]
if ConfigComponent.is_instantiable(v) and updated is None:
Expand Down
2 changes: 1 addition & 1 deletion monai/bundle/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
__all__ = ["ID_REF_KEY", "ID_SEP_KEY", "EXPR_KEY", "MACRO_KEY", "DEFAULT_MLFLOW_SETTINGS", "DEFAULT_EXP_MGMT_SETTINGS"]

ID_REF_KEY = "@" # start of a reference to a ConfigItem
ID_SEP_KEY = "#" # separator for the ID of a ConfigItem
ID_SEP_KEY = "::" # separator for the ID of a ConfigItem
EXPR_KEY = "$" # start of a ConfigExpression
MACRO_KEY = "%" # start of a macro of a config

Expand Down
Loading

0 comments on commit 8040aa2

Please sign in to comment.