From 22c8ac7c1afe848a01a6839b4a624a4da293ccb9 Mon Sep 17 00:00:00 2001 From: Etienne Pot Date: Fri, 21 Feb 2025 08:33:08 -0800 Subject: [PATCH] Support config for the hugginface loader PiperOrigin-RevId: 729540335 --- kauldron/data/py/data_sources.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kauldron/data/py/data_sources.py b/kauldron/data/py/data_sources.py index 13eb5bea..4a406bf3 100644 --- a/kauldron/data/py/data_sources.py +++ b/kauldron/data/py/data_sources.py @@ -70,17 +70,21 @@ def data_source(self) -> grain.RandomAccessDataSource: class HuggingFace(base.DataSourceBase): """HuggingFace loader.""" - name: str + path: str _: dataclasses.KW_ONLY + config: str | None = None split: str data_dir: epath.PathLike | None = None + cache_dir: epath.PathLike | None = None @functools.cached_property def data_source(self) -> grain.RandomAccessDataSource: return datasets.load_dataset( - self.name, + self.path, + name=self.config, split=self.split, data_dir=self.data_dir, + cache_dir=self.cache_dir, )