Skip to content

Commit

Permalink
Support config for the hugginface loader
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 729540335
  • Loading branch information
Conchylicultor authored and The kauldron Authors committed Feb 21, 2025
1 parent 746dbc8 commit 22c8ac7
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions kauldron/data/py/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,21 @@ def data_source(self) -> grain.RandomAccessDataSource:
class HuggingFace(base.DataSourceBase):
"""HuggingFace loader."""

name: str
path: str
_: dataclasses.KW_ONLY
config: str | None = None
split: str
data_dir: epath.PathLike | None = None
cache_dir: epath.PathLike | None = None

@functools.cached_property
def data_source(self) -> grain.RandomAccessDataSource:
return datasets.load_dataset(
self.name,
self.path,
name=self.config,
split=self.split,
data_dir=self.data_dir,
cache_dir=self.cache_dir,
)


Expand Down

0 comments on commit 22c8ac7

Please sign in to comment.