Skip to content

Commit

Permalink
[pre-commit.ci] pre-commit autoupdate (#1329)
Browse files Browse the repository at this point in the history
* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.14 → v0.6.9](astral-sh/ruff-pre-commit@v0.1.14...v0.6.9)
- [github.com/pre-commit/mirrors-mypy: v1.8.0 → v1.11.2](pre-commit/mirrors-mypy@v1.8.0...v1.11.2)
- [github.com/python-jsonschema/check-jsonschema: 0.27.3 → 0.29.3](python-jsonschema/check-jsonschema@0.27.3...0.29.3)
- [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v5.0.0](pre-commit/pre-commit-hooks@v4.5.0...v5.0.0)

* fix(pre-commit): Minor fixes

* maint: Update to 3.8 min

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: eddiebergman <[email protected]>
  • Loading branch information
pre-commit-ci[bot] and eddiebergman authored Oct 14, 2024
1 parent dea8724 commit 3155b5f
Show file tree
Hide file tree
Showing 21 changed files with 240 additions and 272 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@ files: |
)/.*\.py$
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.14
rev: v0.6.9
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --no-cache]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
rev: v1.11.2
hooks:
- id: mypy
additional_dependencies:
- types-requests
- types-python-dateutil
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.27.3
rev: 0.29.4
hooks:
- id: check-github-workflows
files: '^github/workflows/.*\.ya?ml$'
types: ["yaml"]
- id: check-dependabot
files: '^\.github/dependabot\.ya?ml$'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-added-large-files
files: ".*"
Expand Down
19 changes: 8 additions & 11 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: str | None
return md5_checksum == md5_checksum_download


def _send_request( # noqa: C901
def _send_request( # noqa: C901, PLR0912
request_method: str,
url: str,
data: DATA_TYPE,
Expand Down Expand Up @@ -387,18 +387,15 @@ def _send_request( # noqa: C901
# -- Check if encoding is not UTF-8 perhaps
if __is_checksum_equal(response.content, md5_checksum):
raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {}"
"because the text encoding is not UTF-8 when downloading {}. "
"There might be a sever-sided issue with the file, "
"see: https://github.com/openml/openml-python/issues/1180.".format(
md5_checksum,
url,
),
f"Checksum of downloaded file is unequal to the expected checksum"
f"{md5_checksum} because the text encoding is not UTF-8 when "
f"downloading {url}. There might be a sever-sided issue with the file, "
"see: https://github.com/openml/openml-python/issues/1180.",
)

raise OpenMLHashException(
"Checksum of downloaded file is unequal to the expected checksum {} "
"when downloading {}.".format(md5_checksum, url),
f"Checksum of downloaded file is unequal to the expected checksum "
f"{md5_checksum} when downloading {url}.",
)

return response
Expand Down Expand Up @@ -464,7 +461,7 @@ def __parse_server_exception(
server_exception = xmltodict.parse(response.text)
except xml.parsers.expat.ExpatError as e:
raise e
except Exception as e: # noqa: BLE001
except Exception as e:
# OpenML has a sophisticated error system
# where information about failures is provided. try to parse this
raise OpenMLServerError(
Expand Down
3 changes: 2 additions & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""""Command Line Interface for `openml` to configure its settings."""
"""Command Line Interface for `openml` to configure its settings."""

from __future__ import annotations

import argparse
Expand Down
8 changes: 4 additions & 4 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,17 +278,17 @@ def _setup(config: _Config | None = None) -> None:
_root_cache_directory.mkdir(exist_ok=True, parents=True)
except PermissionError:
openml_logger.warning(
"No permission to create openml cache directory at %s! This can result in "
"OpenML-Python not working properly." % _root_cache_directory,
f"No permission to create openml cache directory at {_root_cache_directory}!"
" This can result in OpenML-Python not working properly.",
)

if cache_exists:
_create_log_handlers()
else:
_create_log_handlers(create_file_handler=False)
openml_logger.warning(
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
"not working properly." % config_dir,
f"No permission to create OpenML directory at {config_dir}! This can result in "
" OpenML-Python not working properly.",
)


Expand Down
8 changes: 4 additions & 4 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
)

if dataset_id is None:
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if description and not re.match(pattern, description):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(description, pattern)
raise ValueError(
f"Invalid symbols {invalid_characters} in description: {description}",
)
pattern = "^[\x00-\x7F]*$"
pattern = "^[\x00-\x7f]*$"
if citation and not re.match(pattern, citation):
# not basiclatin (XSD complains)
invalid_characters = find_invalid_characters(citation, pattern)
Expand Down Expand Up @@ -574,7 +574,7 @@ def _parse_data_from_file(self, data_file: Path) -> tuple[list[str], list[bool],
def _parse_data_from_pq(self, data_file: Path) -> tuple[list[str], list[bool], pd.DataFrame]:
try:
data = pd.read_parquet(data_file)
except Exception as e: # noqa: BLE001
except Exception as e:
raise Exception(f"File: {data_file}") from e
categorical = [data[c].dtype.name == "category" for c in data.columns]
attribute_names = list(data.columns)
Expand Down Expand Up @@ -816,7 +816,7 @@ def get_data( # noqa: C901, PLR0912, PLR0915
to_exclude.extend(self.ignore_attribute)

if len(to_exclude) > 0:
logger.info("Going to remove the following attributes: %s" % to_exclude)
logger.info(f"Going to remove the following attributes: {to_exclude}")
keep = np.array([column not in to_exclude for column in attribute_names])
data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]

Expand Down
31 changes: 11 additions & 20 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import warnings
from collections import OrderedDict
from pathlib import Path
from pyexpat import ExpatError
from typing import TYPE_CHECKING, Any, overload
from typing_extensions import Literal

Expand All @@ -15,7 +16,6 @@
import pandas as pd
import urllib3
import xmltodict
from pyexpat import ExpatError
from scipy.sparse import coo_matrix

import openml._api_calls
Expand Down Expand Up @@ -85,8 +85,7 @@ def list_datasets(
*,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -98,8 +97,7 @@ def list_datasets(
tag: str | None,
output_format: Literal["dataframe"],
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


@overload
Expand All @@ -111,8 +109,7 @@ def list_datasets(
tag: str | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_datasets(
Expand Down Expand Up @@ -207,17 +204,15 @@ def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dict"] = "dict",
**kwargs: Any,
) -> dict:
...
) -> dict: ...


@overload
def _list_datasets(
data_id: list | None = ...,
output_format: Literal["dataframe"] = "dataframe",
**kwargs: Any,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def _list_datasets(
Expand Down Expand Up @@ -256,18 +251,16 @@ def _list_datasets(
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if data_id is not None:
api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
return __list_datasets(api_call=api_call, output_format=output_format)


@overload
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
...
def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...


@overload
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
...
def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...


def __list_datasets(
Expand Down Expand Up @@ -785,10 +778,8 @@ def create_dataset( # noqa: C901, PLR0912, PLR0915
if not is_row_id_an_attribute:
raise ValueError(
"'row_id_attribute' should be one of the data attribute. "
" Got '{}' while candidates are {}.".format(
row_id_attribute,
[attr[0] for attr in attributes_],
),
f" Got '{row_id_attribute}' while candidates are"
f" {[attr[0] for attr in attributes_]}.",
)

if isinstance(data, pd.DataFrame):
Expand Down
22 changes: 10 additions & 12 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dict", "object"] = "dict",
) -> dict:
...
) -> dict: ...


@overload
Expand All @@ -51,8 +50,7 @@ def list_evaluations(
per_fold: bool | None = ...,
sort_order: str | None = ...,
output_format: Literal["dataframe"] = ...,
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def list_evaluations(
Expand Down Expand Up @@ -204,24 +202,24 @@ def _list_evaluations(
-------
dict of objects, or dataframe
"""
api_call = "evaluation/list/function/%s" % function
api_call = f"evaluation/list/function/{function}"
if kwargs is not None:
for operator, value in kwargs.items():
api_call += f"/{operator}/{value}"
if tasks is not None:
api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
if setups is not None:
api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
if flows is not None:
api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
if runs is not None:
api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
if uploaders is not None:
api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
if study is not None:
api_call += "/study/%d" % study
if sort_order is not None:
api_call += "/sort_order/%s" % sort_order
api_call += f"/sort_order/{sort_order}"

return __list_evaluations(api_call, output_format=output_format)

Expand All @@ -236,7 +234,7 @@ def __list_evaluations(
# Minimalistic check if the XML is useful
if "oml:evaluations" not in evals_dict:
raise ValueError(
"Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
"Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
)

assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
Expand Down
Loading

0 comments on commit 3155b5f

Please sign in to comment.