diff --git a/python/mlcroissant/mlcroissant/_src/core/json_ld.py b/python/mlcroissant/mlcroissant/_src/core/json_ld.py index 5f1d118b..c9a78330 100644 --- a/python/mlcroissant/mlcroissant/_src/core/json_ld.py +++ b/python/mlcroissant/mlcroissant/_src/core/json_ld.py @@ -147,7 +147,10 @@ def box_singleton_list(element: Any) -> list[Any] | None: def recursively_populate_jsonld(entry_node: Json, id_to_node: dict[str, Json]) -> Any: """Changes in place `entry_node` with its children.""" - if "@value" in entry_node: + if not isinstance(entry_node, dict): + # If entry_node is not dict, just return it without doing anything. + return entry_node + elif "@value" in entry_node: if entry_node.get("@type") == namespace.RDF.JSON: # Stringified JSON is loaded as a dict. return json.loads(entry_node["@value"]) diff --git a/python/mlcroissant/mlcroissant/_src/core/json_ld_test.py b/python/mlcroissant/mlcroissant/_src/core/json_ld_test.py index 88d05997..fde3412e 100644 --- a/python/mlcroissant/mlcroissant/_src/core/json_ld_test.py +++ b/python/mlcroissant/mlcroissant/_src/core/json_ld_test.py @@ -3,8 +3,11 @@ import json from etils import epath +import pytest +from mlcroissant._src.core.json_ld import recursively_populate_jsonld from mlcroissant._src.core.rdf import make_context +from mlcroissant._src.core.types import Json from mlcroissant._src.datasets import Dataset from mlcroissant._src.tests.versions import parametrize_version @@ -68,3 +71,16 @@ def test_make_context(): "transform": "cr:transform", "foo": "bar", } + + +@pytest.mark.parametrize( + ["data"], + [ + [{}], + [{"value": 3}], + [{"singelton_array": ["a"]}], + [{"array": ["a", "b", "c"]}], + ], +) +def test_sanity_recursively_populate_jsonld(data: Json) -> None: + recursively_populate_jsonld(data, {})