Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add/1034 #1352

Merged
merged 10 commits into from
Sep 22, 2024
40 changes: 9 additions & 31 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ def _name_to_id(

def get_datasets(
dataset_ids: list[str | int],
download_data: bool = True, # noqa: FBT001, FBT002
download_qualities: bool = True, # noqa: FBT001, FBT002
download_data: bool = False, # noqa: FBT001, FBT002
download_qualities: bool = False, # noqa: FBT001, FBT002
) -> list[OpenMLDataset]:
"""Download datasets.

Expand Down Expand Up @@ -450,14 +450,14 @@ def get_datasets(


@openml.utils.thread_safe_if_oslo_installed
def get_dataset( # noqa: C901, PLR0912, PLR0915
def get_dataset( # noqa: C901, PLR0912
dataset_id: int | str,
download_data: bool | None = None, # Optional for deprecation warning; later again only bool
download_data: bool = False, # noqa: FBT002, FBT001
version: int | None = None,
error_if_multiple: bool = False, # noqa: FBT002, FBT001
cache_format: Literal["pickle", "feather"] = "pickle",
download_qualities: bool | None = None, # Same as above
download_features_meta_data: bool | None = None, # Same as above
download_qualities: bool = False, # noqa: FBT002, FBT001
download_features_meta_data: bool = False, # noqa: FBT002, FBT001
download_all_files: bool = False, # noqa: FBT002, FBT001
force_refresh_cache: bool = False, # noqa: FBT001, FBT002
) -> OpenMLDataset:
Expand Down Expand Up @@ -485,7 +485,7 @@ def get_dataset( # noqa: C901, PLR0912, PLR0915
----------
dataset_id : int or str
Dataset ID of the dataset to download
download_data : bool (default=True)
download_data : bool (default=False)
If True, also download the data file. Beware that some datasets are large and it might
make the operation noticeably slower. Metadata is also still retrieved.
If False, create the OpenMLDataset and only populate it with the metadata.
Expand All @@ -499,12 +499,12 @@ def get_dataset( # noqa: C901, PLR0912, PLR0915
Format for caching the dataset - may be feather or pickle
Note that the default 'pickle' option may load slower than feather when
no.of.rows is very high.
download_qualities : bool (default=True)
download_qualities : bool (default=False)
Option to download 'qualities' meta-data in addition to the minimal dataset description.
If True, download and cache the qualities file.
If False, create the OpenMLDataset without qualities metadata. The data may later be added
to the OpenMLDataset through the `OpenMLDataset.load_metadata(qualities=True)` method.
download_features_meta_data : bool (default=True)
download_features_meta_data : bool (default=False)
Option to download 'features' meta-data in addition to the minimal dataset description.
If True, download and cache the features file.
If False, create the OpenMLDataset without features metadata. The data may later be added
Expand All @@ -523,28 +523,6 @@ def get_dataset( # noqa: C901, PLR0912, PLR0915
dataset : :class:`openml.OpenMLDataset`
The downloaded dataset.
"""
# TODO(0.15): Remove the deprecation warning and make the default False; adjust types above
# and documentation. Also remove None-to-True-cases below
if any(
download_flag is None
for download_flag in [download_data, download_qualities, download_features_meta_data]
):
warnings.warn(
"Starting from Version 0.15 `download_data`, `download_qualities`, and `download_featu"
"res_meta_data` will all be ``False`` instead of ``True`` by default to enable lazy "
"loading. To disable this message until version 0.15 explicitly set `download_data`, "
"`download_qualities`, and `download_features_meta_data` to a bool while calling "
"`get_dataset`.",
FutureWarning,
stacklevel=2,
)

download_data = True if download_data is None else download_data
download_qualities = True if download_qualities is None else download_qualities
download_features_meta_data = (
True if download_features_meta_data is None else download_features_meta_data
)

if download_all_files:
warnings.warn(
"``download_all_files`` is experimental and is likely to break with new releases.",
Expand Down
4 changes: 2 additions & 2 deletions openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class TestBase(unittest.TestCase):
logger = logging.getLogger("unit_tests_published_entities")
logger.setLevel(logging.DEBUG)

def setUp(self, n_levels: int = 1) -> None:
def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
LennartPurucker marked this conversation as resolved.
Show resolved Hide resolved
"""Setup variables and temporary directories.

In particular, this methods:
Expand Down Expand Up @@ -92,7 +92,7 @@ def setUp(self, n_levels: int = 1) -> None:
self.static_cache_dir = static_cache_dir
self.cwd = Path.cwd()
workdir = Path(__file__).parent.absolute()
tmp_dir_name = self.id()
tmp_dir_name = self.id() + tmpdir_suffix
self.workdir = workdir / tmp_dir_name
shutil.rmtree(self.workdir, ignore_errors=True)

Expand Down
Loading
Loading