Skip to content

Commit

Permalink
Merge branch 'develop' into fix/correct-path-separator-windows
Browse files Browse the repository at this point in the history
  • Loading branch information
areebahmeddd authored Nov 4, 2024
2 parents 67fd48e + d844fe4 commit 9428b06
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@

### Features

* allow fetching other datasets (obf, opff, opf) ([#223](https://github.com/openfoodfacts/openfoodfacts-python/pull/223))
* add download_image function ([#243](https://github.com/openfoodfacts/openfoodfacts-python/issues/243)) ([265f10b](https://github.com/openfoodfacts/openfoodfacts-python/commit/265f10bfa9047c48874255fbc66d9bab32fa61c5))

## [0.3.0](https://github.com/openfoodfacts/openfoodfacts-python/compare/v0.2.1...v0.3.0) (2024-04-18)
Expand Down
8 changes: 7 additions & 1 deletion openfoodfacts/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def get_dataset(
force_download: bool = False,
download_newer: bool = False,
cache_dir: Optional[Path] = None,
obsolete: bool = False,
) -> Path:
"""Download (and cache) Open Food Facts dataset.
Expand All @@ -60,10 +61,13 @@ def get_dataset(
version is available (based on file Etag)
:param cache_dir: the cache directory to use, defaults to
~/.cache/openfoodfacts/taxonomy
:param obsolete: if True, download the obsolete dataset, defaults to False
:return: the path of the dataset
"""
cache_dir = DEFAULT_CACHE_DIR if cache_dir is None else cache_dir
file_name = DATASET_FILE_NAMES[flavor][dataset_type]
if obsolete:
file_name = file_name.replace(".jsonl.gz", "_obsolete.jsonl.gz")
dataset_path = cache_dir / file_name
url = f"{URLBuilder.static(flavor, Environment.org)}/data/{file_name}"
cache_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -82,6 +86,7 @@ def __init__(
flavor: Flavor = Flavor.off,
dataset_type: DatasetType = DatasetType.jsonl,
dataset_path: Optional[Path] = None,
obsolete: bool = False,
**kwargs,
):
"""A product dataset.
Expand All @@ -101,6 +106,7 @@ def __init__(
to DatasetType.jsonl. This parameter is ignored if dataset_path is
provided.
:param dataset_path: the path of the dataset, defaults to None.
:param obsolete: if True, download the obsolete dataset, defaults to False.
:param kwargs: additional arguments passed to `get_dataset` when
downloading the dataset
"""
Expand All @@ -118,7 +124,7 @@ def __init__(
else:
raise ValueError(f"Unknown dataset type: {full_suffix}")
else:
self.dataset_path = get_dataset(flavor, dataset_type, **kwargs)
self.dataset_path = get_dataset(flavor, dataset_type, obsolete=obsolete, **kwargs)

def __iter__(self):
if self.dataset_type is DatasetType.jsonl:
Expand Down

0 comments on commit 9428b06

Please sign in to comment.