From 7ec7692effa9b80b76dadfb2b57b3b3a513d81a4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 7 Oct 2024 21:49:45 +0000
Subject: [PATCH 1/3] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.14 → v0.6.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.14...v0.6.9)
- [github.com/pre-commit/mirrors-mypy: v1.8.0 → v1.11.2](https://github.com/pre-commit/mirrors-mypy/compare/v1.8.0...v1.11.2)
- [github.com/python-jsonschema/check-jsonschema: 0.27.3 → 0.29.3](https://github.com/python-jsonschema/check-jsonschema/compare/0.27.3...0.29.3)
- [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v5.0.0)
---
 .pre-commit-config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5f13625a0..6598ae9e5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,20 +7,20 @@ files: |
   )/.*\.py$
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.14
+    rev: v0.6.9
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
+    rev: v1.11.2
     hooks:
       - id: mypy
         additional_dependencies:
           - types-requests
           - types-python-dateutil
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.27.3
+    rev: 0.29.3
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
@@ -28,7 +28,7 @@ repos:
       - id: check-dependabot
         files: '^\.github/dependabot\.ya?ml$'
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
         files: ".*"

From 232d9d1defee2ec7fa408edf83c297ba73a8c700 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Mon, 14 Oct 2024 10:47:53 +0200
Subject: [PATCH 2/3] fix(pre-commit): Minor fixes

---
 .pre-commit-config.yaml                |   2 +-
 openml/_api_calls.py                   |  19 ++--
 openml/cli.py                          |   3 +-
 openml/config.py                       |   8 +-
 openml/datasets/dataset.py             |   8 +-
 openml/datasets/functions.py           |  31 ++----
 openml/evaluations/functions.py        |  22 ++--
 openml/extensions/sklearn/extension.py |  78 ++++++++------
 openml/flows/flow.py                   |  10 +-
 openml/flows/functions.py              |  38 +++----
 openml/runs/functions.py               |  28 +++--
 openml/runs/run.py                     |   2 +-
 openml/runs/trace.py                   |  22 ++--
 openml/setups/functions.py             |   9 +-
 openml/study/functions.py              |  32 ++----
 openml/tasks/functions.py              |   9 +-
 openml/tasks/split.py                  |   6 +-
 openml/tasks/task.py                   |  14 +--
 openml/testing.py                      |   2 +-
 openml/utils.py                        |  21 ++--
 pyproject.toml                         | 140 +++++++++++++------------
 21 files changed, 236 insertions(+), 268 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6598ae9e5..e46a59318 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
           - types-requests
           - types-python-dateutil
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.29.3
+    rev: 0.29.4
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 4f673186e..b74b50cb4 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -351,7 +351,7 @@ def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: str | None
     return md5_checksum == md5_checksum_download
 
 
-def _send_request(  # noqa: C901
+def _send_request(  # noqa: C901, PLR0912
     request_method: str,
     url: str,
     data: DATA_TYPE,
@@ -387,18 +387,15 @@ def _send_request(  # noqa: C901
                     # -- Check if encoding is not UTF-8 perhaps
                     if __is_checksum_equal(response.content, md5_checksum):
                         raise OpenMLHashException(
-                            "Checksum of downloaded file is unequal to the expected checksum {}"
-                            "because the text encoding is not UTF-8 when downloading {}. "
-                            "There might be a sever-sided issue with the file, "
-                            "see: https://github.com/openml/openml-python/issues/1180.".format(
-                                md5_checksum,
-                                url,
-                            ),
+                            f"Checksum of downloaded file is unequal to the expected checksum"
+                            f"{md5_checksum} because the text encoding is not UTF-8 when "
+                            f"downloading {url}. There might be a sever-sided issue with the file, "
+                            "see: https://github.com/openml/openml-python/issues/1180.",
                         )
 
                     raise OpenMLHashException(
-                        "Checksum of downloaded file is unequal to the expected checksum {} "
-                        "when downloading {}.".format(md5_checksum, url),
+                        f"Checksum of downloaded file is unequal to the expected checksum "
+                        f"{md5_checksum} when downloading {url}.",
                     )
 
                 return response
@@ -464,7 +461,7 @@ def __parse_server_exception(
         server_exception = xmltodict.parse(response.text)
     except xml.parsers.expat.ExpatError as e:
         raise e
-    except Exception as e:  # noqa: BLE001
+    except Exception as e:
         # OpenML has a sophisticated error system
         # where information about failures is provided. try to parse this
         raise OpenMLServerError(
diff --git a/openml/cli.py b/openml/cli.py
index 5732442d0..d0a46e498 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -1,4 +1,5 @@
-""""Command Line Interface for `openml` to configure its settings."""
+"""Command Line Interface for `openml` to configure its settings."""
+
 from __future__ import annotations
 
 import argparse
diff --git a/openml/config.py b/openml/config.py
index 6a37537dc..b21c981e2 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -278,8 +278,8 @@ def _setup(config: _Config | None = None) -> None:
             _root_cache_directory.mkdir(exist_ok=True, parents=True)
     except PermissionError:
         openml_logger.warning(
-            "No permission to create openml cache directory at %s! This can result in "
-            "OpenML-Python not working properly." % _root_cache_directory,
+            f"No permission to create openml cache directory at {_root_cache_directory}!"
+            " This can result in OpenML-Python not working properly.",
         )
 
     if cache_exists:
@@ -287,8 +287,8 @@ def _setup(config: _Config | None = None) -> None:
     else:
         _create_log_handlers(create_file_handler=False)
         openml_logger.warning(
-            "No permission to create OpenML directory at %s! This can result in OpenML-Python "
-            "not working properly." % config_dir,
+            f"No permission to create OpenML directory at {config_dir}! This can result in "
+            " OpenML-Python not working properly.",
         )
 
 
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 30febcba5..c9064ba70 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
             )
 
         if dataset_id is None:
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if description and not re.match(pattern, description):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(description, pattern)
                 raise ValueError(
                     f"Invalid symbols {invalid_characters} in description: {description}",
                 )
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if citation and not re.match(pattern, citation):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(citation, pattern)
@@ -574,7 +574,7 @@ def _parse_data_from_file(self, data_file: Path) -> tuple[list[str], list[bool],
     def _parse_data_from_pq(self, data_file: Path) -> tuple[list[str], list[bool], pd.DataFrame]:
         try:
             data = pd.read_parquet(data_file)
-        except Exception as e:  # noqa: BLE001
+        except Exception as e:
             raise Exception(f"File: {data_file}") from e
         categorical = [data[c].dtype.name == "category" for c in data.columns]
         attribute_names = list(data.columns)
@@ -816,7 +816,7 @@ def get_data(  # noqa: C901, PLR0912, PLR0915
                 to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
-            logger.info("Going to remove the following attributes: %s" % to_exclude)
+            logger.info(f"Going to remove the following attributes: {to_exclude}")
             keep = np.array([column not in to_exclude for column in attribute_names])
             data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]
 
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 410867b01..f7eee98d6 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -6,6 +6,7 @@
 import warnings
 from collections import OrderedDict
 from pathlib import Path
+from pyexpat import ExpatError
 from typing import TYPE_CHECKING, Any, overload
 from typing_extensions import Literal
 
@@ -15,7 +16,6 @@
 import pandas as pd
 import urllib3
 import xmltodict
-from pyexpat import ExpatError
 from scipy.sparse import coo_matrix
 
 import openml._api_calls
@@ -85,8 +85,7 @@ def list_datasets(
     *,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -98,8 +97,7 @@ def list_datasets(
     tag: str | None,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -111,8 +109,7 @@ def list_datasets(
     tag: str | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_datasets(
@@ -207,8 +204,7 @@ def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -216,8 +212,7 @@ def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def _list_datasets(
@@ -256,18 +251,16 @@ def _list_datasets(
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
+        api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
     return __list_datasets(api_call=api_call, output_format=output_format)
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_datasets(
@@ -785,10 +778,8 @@ def create_dataset(  # noqa: C901, PLR0912, PLR0915
         if not is_row_id_an_attribute:
             raise ValueError(
                 "'row_id_attribute' should be one of the data attribute. "
-                " Got '{}' while candidates are {}.".format(
-                    row_id_attribute,
-                    [attr[0] for attr in attributes_],
-                ),
+                f" Got '{row_id_attribute}' while candidates are"
+                f" {[attr[0] for attr in attributes_]}.",
             )
 
     if isinstance(data, pd.DataFrame):
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index a854686d1..a39096a58 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -32,8 +32,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dict", "object"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -51,8 +50,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dataframe"] = ...,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_evaluations(
@@ -204,24 +202,24 @@ def _list_evaluations(
     -------
     dict of objects, or dataframe
     """
-    api_call = "evaluation/list/function/%s" % function
+    api_call = f"evaluation/list/function/{function}"
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if tasks is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
+        api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
     if setups is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
     if flows is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
+        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
     if runs is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
+        api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
     if uploaders is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
+        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
-        api_call += "/sort_order/%s" % sort_order
+        api_call += f"/sort_order/{sort_order}"
 
     return __list_evaluations(api_call, output_format=output_format)
 
@@ -236,7 +234,7 @@ def __list_evaluations(
     # Minimalistic check if the XML is useful
     if "oml:evaluations" not in evals_dict:
         raise ValueError(
-            "Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
+            "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
         )
 
     assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 02322196e..2d40d03b8 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -48,12 +48,27 @@
     r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$",
 )
 
-sctypes = np.sctypes if Version(np.__version__) < Version("2.0") else np.core.sctypes
+# NOTE(eddiebergman): This was imported before but became deprecated,
+# as a result I just enumerated them manually by copy-ing and pasting,
+# recommended solution in Numpy 2.0 guide was to explicitly list them.
 SIMPLE_NUMPY_TYPES = [
-    nptype
-    for type_cat, nptypes in sctypes.items()
-    for nptype in nptypes  # type: ignore
-    if type_cat != "others"
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+    np.longlong,
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+    np.ulonglong,
+    np.float16,
+    np.float32,
+    np.float64,
+    np.longdouble,
+    np.complex64,
+    np.complex128,
+    np.clongdouble,
 ]
 SIMPLE_TYPES = (bool, int, float, str, *SIMPLE_NUMPY_TYPES)
 
@@ -312,7 +327,7 @@ def flow_to_model(
             strict_version=strict_version,
         )
 
-    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0913, PLR0912
+    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
         self,
         o: Any,
         components: dict | None = None,
@@ -419,7 +434,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0913, PLR0912
                         strict_version=strict_version,
                     )
                 else:
-                    raise ValueError("Cannot flow_to_sklearn %s" % serialized_type)
+                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
 
             else:
                 rval = OrderedDict(
@@ -979,17 +994,17 @@ def flatten_all(list_):
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
-                        msg = "Length of tuple of type {} does not match assumptions".format(
-                            sub_component_type,
+                        raise ValueError(
+                            f"Length of tuple of type {sub_component_type}"
+                            " does not match assumptions"
                         )
-                        raise ValueError(msg)
 
                     if isinstance(sub_component, str):
                         if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
                             msg = (
                                 "Second item of tuple does not match assumptions. "
                                 "If string, can be only 'drop' or 'passthrough' but"
-                                "got %s" % sub_component
+                                f"got {sub_component}"
                             )
                             raise ValueError(msg)
                     elif sub_component is None:
@@ -1002,15 +1017,15 @@ def flatten_all(list_):
                     elif not isinstance(sub_component, OpenMLFlow):
                         msg = (
                             "Second item of tuple does not match assumptions. "
-                            "Expected OpenMLFlow, got %s" % type(sub_component)
+                            f"Expected OpenMLFlow, got {type(sub_component)}"
                         )
                         raise TypeError(msg)
 
                     if identifier in reserved_keywords:
                         parent_model = f"{model.__module__}.{model.__class__.__name__}"
-                        msg = "Found element shadowing official " "parameter for {}: {}".format(
-                            parent_model,
-                            identifier,
+                        msg = (
+                            "Found element shadowing official "
+                            f"parameter for {parent_model}: {identifier}"
                         )
                         raise PyOpenMLError(msg)
 
@@ -1035,9 +1050,9 @@ def flatten_all(list_):
                             model=None,
                         )
                         component_reference: OrderedDict[str, str | dict] = OrderedDict()
-                        component_reference[
-                            "oml-python:serialized_object"
-                        ] = COMPOSITION_STEP_CONSTANT
+                        component_reference["oml-python:serialized_object"] = (
+                            COMPOSITION_STEP_CONSTANT
+                        )
                         cr_value: dict[str, Any] = OrderedDict()
                         cr_value["key"] = identifier
                         cr_value["step_name"] = identifier
@@ -1218,7 +1233,7 @@ def _check_dependencies(
         for dependency_string in dependencies_list:
             match = DEPENDENCIES_PATTERN.match(dependency_string)
             if not match:
-                raise ValueError("Cannot parse dependency %s" % dependency_string)
+                raise ValueError(f"Cannot parse dependency {dependency_string}")
 
             dependency_name = match.group("name")
             operation = match.group("operation")
@@ -1237,7 +1252,7 @@ def _check_dependencies(
                     installed_version > required_version or installed_version == required_version
                 )
             else:
-                raise NotImplementedError("operation '%s' is not supported" % operation)
+                raise NotImplementedError(f"operation '{operation}' is not supported")
             message = (
                 "Trying to deserialize a model with dependency "
                 f"{dependency_string} not satisfied."
@@ -1363,7 +1378,7 @@ def _serialize_cross_validator(self, o: Any) -> OrderedDict[str, str | dict]:
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter("always", DeprecationWarning)
                 value = getattr(o, key, None)
-                if w is not None and len(w) and w[0].category == DeprecationWarning:
+                if w is not None and len(w) and w[0].category is DeprecationWarning:
                     # if the parameter is deprecated, don't show it
                     continue
 
@@ -1812,9 +1827,9 @@ def _prediction_to_probabilities(
                     # then we need to add a column full of zeros into the probabilities
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
-                    message = "Estimator only predicted for {}/{} classes!".format(
-                        proba_y.shape[1],
-                        len(task.class_labels),
+                    message = (
+                        f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)}"
+                        " classes!"
                     )
                     warnings.warn(message, stacklevel=2)
                     openml.config.logger.warning(message)
@@ -2008,9 +2023,8 @@ def is_subcomponent_specification(values):
                                 pass
                             else:
                                 raise TypeError(
-                                    "Subcomponent flow should be of type flow, but is {}".format(
-                                        type(subcomponent_flow),
-                                    ),
+                                    "Subcomponent flow should be of type flow, but is"
+                                    f" {type(subcomponent_flow)}",
                                 )
 
                         current = {
@@ -2129,8 +2143,8 @@ def instantiate_model_from_hpo_class(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of"
+                " sklearn.model_selection._search.BaseSearchCV",
             )
         base_estimator = model.estimator
         base_estimator.set_params(**trace_iteration.get_parameters())
@@ -2197,8 +2211,8 @@ def _obtain_arff_trace(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of "
+                "sklearn.model_selection._search.BaseSearchCV",
             )
         if not hasattr(model, "cv_results_"):
             raise ValueError("model should contain `cv_results_`")
@@ -2235,7 +2249,7 @@ def _obtain_arff_trace(
                         # hyperparameter layer_sizes of MLPClassifier
                         type = "STRING"  # noqa: A001
                     else:
-                        raise TypeError("Unsupported param type in param grid: %s" % key)
+                        raise TypeError(f"Unsupported param type in param grid: {key}")
 
                 # renamed the attribute param to parameter, as this is a required
                 # OpenML convention - this also guards against name collisions
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 4e437e35c..a3ff50ca1 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -135,15 +135,13 @@ def __init__(  # noqa: PLR0913
         keys_parameters_meta_info = set(parameters_meta_info.keys())
         if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters, but not in "
-                "parameters_meta_info."
-                % str(keys_parameters.difference(keys_parameters_meta_info)),
+                f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in "
+                "parameters, but not in parameters_meta_info.",
             )
         if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters_meta_info, "
-                "but not in parameters."
-                % str(keys_parameters_meta_info.difference(keys_parameters)),
+                f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in "
+                " parameters_meta_info, but not in parameters.",
             )
 
         self.external_version = external_version
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index b01e54b44..3d056ac60 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -140,8 +140,7 @@ def list_flows(
     tag: str | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -152,8 +151,7 @@ def list_flows(
     *,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -163,8 +161,7 @@ def list_flows(
     tag: str | None,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_flows(
@@ -243,18 +240,15 @@ def list_flows(
 
 
 @overload
-def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict:
-    ...
+def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: ...
 
 
 @overload
-def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 @overload
-def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 def _list_flows(
@@ -391,13 +385,11 @@ def get_flow_id(
 
 
 @overload
-def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_flows(
@@ -453,7 +445,7 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
     while len(stack) > 0:
         current = stack.pop()
         if current.flow_id is None:
-            raise ValueError("Flow %s has no flow_id!" % current.name)
+            raise ValueError(f"Flow {current.name} has no flow_id!")
 
         for component in current.components.values():
             stack.append(component)
@@ -492,10 +484,10 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
         Whether to ignore matching of flow descriptions.
     """
     if not isinstance(flow1, OpenMLFlow):
-        raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1))
+        raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}")
 
     if not isinstance(flow2, OpenMLFlow):
-        raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2))
+        raise TypeError(f"Argument 2 must be of type OpenMLFlow, but is {type(flow2)}")
 
     # TODO as they are actually now saved during publish, it might be good to
     # check for the equality of these as well.
@@ -522,11 +514,11 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
             for name in set(attr1.keys()).union(attr2.keys()):
                 if name not in attr1:
                     raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name,
+                        f"Component {name} only available in " "argument2, but not in argument1.",
                     )
                 if name not in attr2:
                     raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name,
+                        f"Component {name} only available in " "argument2, but not in argument1.",
                     )
                 assert_flows_equal(
                     attr1[name],
@@ -549,9 +541,9 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
                     symmetric_difference = params_flow_1 ^ params_flow_2
                     if len(symmetric_difference) > 0:
                         raise ValueError(
-                            "Flow %s: parameter set of flow "
+                            f"Flow {flow1.name}: parameter set of flow "
                             "differs from the parameters stored "
-                            "on the server." % flow1.name,
+                            "on the server.",
                         )
 
                 if ignore_parameter_values_on_older_children:
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index f7963297d..510f767d5 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -679,9 +679,9 @@ def _calculate_local_measure(  # type: ignore
             user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
                 measure
             ]
-            user_defined_measures_per_sample[measure][rep_no][fold_no][
-                sample_no
-            ] = user_defined_measures_fold[measure]
+            user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
+                user_defined_measures_fold[measure]
+            )
 
     trace: OpenMLRunTrace | None = None
     if len(traces) > 0:
@@ -783,13 +783,9 @@ def _run_task_get_arffcontent_parallel_helper(  # noqa: PLR0913
         raise NotImplementedError(task.task_type)
 
     config.logger.info(
-        "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format(
-            str(model),
-            openml.datasets.get_dataset(task.dataset_id).name,
-            rep_no,
-            fold_no,
-            sample_no,
-        ),
+        f"Going to run model {model!s} on "
+        f"dataset {openml.datasets.get_dataset(task.dataset_id).name} "
+        f"for repeat {rep_no} fold {fold_no} sample {sample_no}"
     )
     (
         pred_y,
@@ -978,7 +974,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
                 else:
                     raise ValueError(
                         'Could not find keys "value" or '
-                        '"array_data" in %s' % str(evaluation_dict.keys()),
+                        f'"array_data" in {evaluation_dict.keys()!s}',
                     )
                 if (
                     "@repeat" in evaluation_dict
@@ -1211,15 +1207,15 @@ def _list_runs(  # noqa: PLR0913
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if id is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in id])
+        api_call += "/run/{}".format(",".join([str(int(i)) for i in id]))
     if task is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
+        api_call += "/task/{}".format(",".join([str(int(i)) for i in task]))
     if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
     if flow is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
+        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow]))
     if uploader is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
+        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader]))
     if study is not None:
         api_call += "/study/%d" % study
     if display_errors:
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 766f8c97f..945264131 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -480,7 +480,7 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
             ]
 
         else:
-            raise NotImplementedError("Task type %s is not yet supported." % str(task.task_type))
+            raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.")
 
         return arff_dict
 
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 3b7d60c2f..bc9e1b5d6 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -80,8 +80,8 @@ def __post_init__(self) -> None:
 
         if self.parameters is not None and not isinstance(self.parameters, dict):
             raise TypeError(
-                "argument parameters is not an instance of OrderedDict, but %s"
-                % str(type(self.parameters)),
+                f"argument parameters is not an instance of OrderedDict, but"
+                f" {type(self.parameters)!s}",
             )
 
     def get_parameters(self) -> dict[str, Any]:
@@ -351,7 +351,7 @@ def _trace_from_arff_struct(
 
         for required_attribute in REQUIRED_ATTRIBUTES:
             if required_attribute not in attribute_idx:
-                raise ValueError("arff misses required attribute: %s" % required_attribute)
+                raise ValueError(f"arff misses required attribute: {required_attribute}")
         if "setup_string" in attribute_idx:
             raise ValueError(error_message)
 
@@ -383,7 +383,7 @@ def _trace_from_arff_struct(
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for selected field, '
-                    "received: %s" % selected_value,
+                    f"received: {selected_value}",
                 )
 
             parameters = {
@@ -448,7 +448,7 @@ def trace_from_xml(cls, xml: str | Path | IO) -> OpenMLRunTrace:
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for '
-                    "selected field, received: %s" % selected_value,
+                    f"selected field, received: {selected_value}",
                 )
 
             current = OpenMLTraceIteration(
@@ -504,10 +504,8 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
                     if list(param_keys) != list(trace_itr_keys):
                         raise ValueError(
                             "Cannot merge traces because the parameters are not equal: "
-                            "{} vs {}".format(
-                                list(trace_itr.parameters.keys()),
-                                list(iteration.parameters.keys()),
-                            ),
+                            f"{list(trace_itr.parameters.keys())} vs "
+                            f"{list(iteration.parameters.keys())}",
                         )
 
                 if key in merged_trace:
@@ -521,9 +519,9 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
         return cls(None, merged_trace)
 
     def __repr__(self) -> str:
-        return "[Run id: {}, {} trace iterations]".format(
-            -1 if self.run_id is None else self.run_id,
-            len(self.trace_iterations),
+        return (
+            f"[Run id: {-1 if self.run_id is None else self.run_id}, "
+            f"{len(self.trace_iterations)} trace iterations]"
         )
 
     def __iter__(self) -> Iterator[OpenMLTraceIteration]:
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index ee0c6d707..0bcd2b4e2 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -212,7 +212,7 @@ def _list_setups(
     """
     api_call = "setup/list"
     if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
@@ -230,13 +230,12 @@ def __list_setups(
     # Minimalistic check if the XML is useful
     if "oml:setups" not in setups_dict:
         raise ValueError(
-            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict),
+            'Error in return XML, does not contain "oml:setups":' f" {setups_dict!s}",
         )
 
     if "@xmlns:oml" not in setups_dict["oml:setups"]:
         raise ValueError(
-            "Error in return XML, does not contain "
-            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict),
+            "Error in return XML, does not contain " f'"oml:setups"/@xmlns:oml: {setups_dict!s}',
         )
 
     if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
@@ -364,7 +363,7 @@ def _create_setup_from_xml(
         else:
             raise ValueError(
                 "Expected None, list or dict, received "
-                "something else: %s" % str(type(xml_parameters)),
+                f"something else: {type(xml_parameters)!s}",
             )
 
     if _output_format in ["dataframe", "dict"]:
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 9d726d286..7fdc6f636 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -90,7 +90,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
     )
     result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)["oml:study"]
     study_id = int(result_dict["oml:id"])
-    alias = result_dict["oml:alias"] if "oml:alias" in result_dict else None
+    alias = result_dict.get("oml:alias", None)
     main_entity_type = result_dict["oml:main_entity_type"]
 
     if entity_type != main_entity_type:
@@ -99,9 +99,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
             f", expected '{entity_type}'"
         )
 
-    benchmark_suite = (
-        result_dict["oml:benchmark_suite"] if "oml:benchmark_suite" in result_dict else None
-    )
+    benchmark_suite = result_dict.get("oml:benchmark_suite", None)
     name = result_dict["oml:name"]
     description = result_dict["oml:description"]
     status = result_dict["oml:status"]
@@ -300,7 +298,7 @@ def update_study_status(study_id: int, status: str) -> None:
     """
     legal_status = {"active", "deactivated"}
     if status not in legal_status:
-        raise ValueError("Illegal status value. " "Legal values: %s" % legal_status)
+        raise ValueError("Illegal status value. " f"Legal values: {legal_status}")
     data = {"study_id": study_id, "status": status}  # type: openml._api_calls.DATA_TYPE
     result_xml = openml._api_calls._perform_api_call("study/status/update", "post", data=data)
     result = xmltodict.parse(result_xml)
@@ -442,8 +440,7 @@ def list_suites(
     status: str | None = ...,
     uploader: list[int] | None = ...,
     output_format: Literal["dict"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -453,8 +450,7 @@ def list_suites(
     status: str | None = ...,
     uploader: list[int] | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_suites(
@@ -538,8 +534,7 @@ def list_studies(
     uploader: list[str] | None = ...,
     benchmark_suite: int | None = ...,
     output_format: Literal["dict"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -550,8 +545,7 @@ def list_studies(
     uploader: list[str] | None = ...,
     benchmark_suite: int | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_studies(
@@ -637,13 +631,11 @@ def list_studies(
 
 
 @overload
-def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict:
-    ...
+def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: ...
 
 
 @overload
-def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 def _list_studies(
@@ -674,13 +666,11 @@ def _list_studies(
 
 
 @overload
-def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_studies(
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 9fd2e4be1..54030422d 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -98,8 +98,9 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]:
         raise ValueError(
             "Error in return XML, value of "
             "oml:estimationprocedures/@xmlns:oml is not "
-            "http://openml.org/openml, but %s"
-            % str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]),
+            "http://openml.org/openml, but {}".format(
+                str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+            ),
         )
 
     procs: list[dict[str, Any]] = []
@@ -276,7 +277,7 @@ def __list_tasks(  # noqa: PLR0912, C901
         raise ValueError(
             "Error in return XML, value of  "
             '"oml:runs"/@xmlns:oml is not '
-            '"http://openml.org/openml": %s' % str(tasks_dict),
+            f'"http://openml.org/openml": {tasks_dict!s}',
         )
 
     assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"])
@@ -527,7 +528,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
         TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
     }.get(task_type)
     if cls is None:
-        raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
+        raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"]))
     return cls(**common_kwargs)  # type: ignore
 
 
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 81105f1fd..ac538496e 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -177,9 +177,9 @@ def get(self, repeat: int = 0, fold: int = 0, sample: int = 0) -> tuple[np.ndarr
             If the specified repeat, fold, or sample is not known.
         """
         if repeat not in self.split:
-            raise ValueError("Repeat %s not known" % str(repeat))
+            raise ValueError(f"Repeat {repeat!s} not known")
         if fold not in self.split[repeat]:
-            raise ValueError("Fold %s not known" % str(fold))
+            raise ValueError(f"Fold {fold!s} not known")
         if sample not in self.split[repeat][fold]:
-            raise ValueError("Sample %s not known" % str(sample))
+            raise ValueError(f"Sample {sample!s} not known")
         return self.split[repeat][fold][sample]
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 064b834ba..e7d19bdce 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -207,7 +207,7 @@ def _to_dict(self) -> dict[str, dict[str, int | str | list[dict[str, Any]]]]:
             {"@name": "source_data", "#text": str(self.dataset_id)},
             {"@name": "estimation_procedure", "#text": str(self.estimation_procedure_id)},
         ]
-        if self.evaluation_measure is not None:  #
+        if self.evaluation_measure is not None:
             oml_input.append({"@name": "evaluation_measures", "#text": self.evaluation_measure})
 
         return {
@@ -283,8 +283,7 @@ def get_X_and_y(
     ) -> tuple[
         np.ndarray | scipy.sparse.spmatrix,
         np.ndarray | None,
-    ]:
-        ...
+    ]: ...
 
     @overload
     def get_X_and_y(
@@ -292,8 +291,7 @@ def get_X_and_y(
     ) -> tuple[
         pd.DataFrame,
         pd.Series | pd.DataFrame | None,
-    ]:
-        ...
+    ]: ...
 
     # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`?
     def get_X_and_y(
@@ -542,12 +540,10 @@ def __init__(  # noqa: PLR0913
     def get_X(
         self,
         dataset_format: Literal["array"] = "array",
-    ) -> np.ndarray | scipy.sparse.spmatrix:
-        ...
+    ) -> np.ndarray | scipy.sparse.spmatrix: ...
 
     @overload
-    def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame:
-        ...
+    def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
     def get_X(
         self,
diff --git a/openml/testing.py b/openml/testing.py
index 529a304d4..9016ff6a9 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -182,7 +182,7 @@ def _get_sentinel(self, sentinel: str | None = None) -> str:
             md5.update(str(time.time()).encode("utf-8"))
             md5.update(str(os.getpid()).encode("utf-8"))
             sentinel = md5.hexdigest()[:10]
-            sentinel = "TEST%s" % sentinel
+            sentinel = f"TEST{sentinel}"
         return sentinel
 
     def _add_sentinel_to_flow_name(
diff --git a/openml/utils.py b/openml/utils.py
index a03610512..66c4df800 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -35,8 +35,7 @@ def extract_xml_tags(
     node: Mapping[str, Any],
     *,
     allow_none: Literal[True] = ...,
-) -> Any | None:
-    ...
+) -> Any | None: ...
 
 
 @overload
@@ -45,8 +44,7 @@ def extract_xml_tags(
     node: Mapping[str, Any],
     *,
     allow_none: Literal[False],
-) -> Any:
-    ...
+) -> Any: ...
 
 
 def extract_xml_tags(
@@ -198,7 +196,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool:
         "user",
     }
     if entity_type not in legal_entities:
-        raise ValueError("Can't delete a %s" % entity_type)
+        raise ValueError(f"Can't delete a {entity_type}")
 
     url_suffix = "%s/%d" % (entity_type, entity_id)
     try:
@@ -245,8 +243,7 @@ def _list_all(
     list_output_format: Literal["dict"] = ...,
     *args: P.args,
     **filters: P.kwargs,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -255,8 +252,7 @@ def _list_all(
     list_output_format: Literal["object"],
     *args: P.args,
     **filters: P.kwargs,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -265,8 +261,7 @@ def _list_all(
     list_output_format: Literal["dataframe"],
     *args: P.args,
     **filters: P.kwargs,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def _list_all(  # noqa: C901, PLR0912
@@ -376,7 +371,7 @@ def _create_cache_directory(key: str) -> Path:
 
     try:
         cache_dir.mkdir(exist_ok=True, parents=True)
-    except Exception as e:  # noqa: BLE001
+    except Exception as e:
         raise openml.exceptions.OpenMLCacheException(
             f"Cannot create cache directory {cache_dir}."
         ) from e
@@ -412,7 +407,7 @@ def _create_cache_directory_for_id(key: str, id_: int) -> Path:
     """
     cache_dir = _get_cache_dir_for_id(key, id_, create=True)
     if cache_dir.exists() and not cache_dir.is_dir():
-        raise ValueError("%s cache dir exists but is not a directory!" % key)
+        raise ValueError(f"{key} cache dir exists but is not a directory!")
 
     cache_dir.mkdir(exist_ok=True, parents=True)
     return cache_dir
diff --git a/pyproject.toml b/pyproject.toml
index ffb1eb001..c5a3dac0e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -127,12 +127,79 @@ markers = [
 
 # https://github.com/charliermarsh/ruff
 [tool.ruff]
-target-version = "py37"
+target-version = "py38"
 line-length = 100
-show-source = true
+output-format = "grouped"
 src = ["openml", "tests", "examples"]
 unsafe-fixes = true
 
+exclude = [
+  # TODO(eddiebergman): Tests should be re-enabled after the refactor
+  "tests",
+  #
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".hg",
+  ".mypy_cache",
+  ".nox",
+  ".pants.d",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+  "docs",
+]
+
+# Exclude a variety of commonly ignored directories.
+[tool.ruff.lint.per-file-ignores]
+"tests/*.py" = [
+  "D100",   # Undocumented public module
+  "D101",   # Missing docstring in public class
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "S101",   # Use of assert
+  "ANN201", # Missing return type annotation for public function
+  "FBT001", # Positional boolean argument
+  "PLR2004",# No use of magic numbers
+  "PD901",  #  X is a bad variable name. (pandas)
+  "TCH",    # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
+  "N803",   # Argument name {name} should be lowercase
+]
+"openml/cli.py" = [
+  "T201",   # print found
+  "T203",   # pprint found
+]
+"openml/__version__.py" = [
+  "D100",   # Undocumented public module
+]
+"__init__.py" = [
+  "I002",   # Missing required import (i.e. from __future__ import annotations)
+]
+"examples/*.py" = [
+  "D101",   # Missing docstring in public class
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D415",   # First line should end with a . or ? or !
+  "INP001", # File is part of an implicit namespace package, add an __init__.py
+  "I002",   # Missing required import (i.e. from __future__ import annotations)
+  "E741",   # Ambigiuous variable name
+  "T201",   # print found
+  "T203",   # pprint found
+  "ERA001", # found commeneted out code
+  "E402",   # Module level import not at top of cell
+  "E501",   # Line too long
+]
+
+[tool.ruff.lint]
 # Allow unused variables when underscore-prefixed.
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
@@ -212,74 +279,9 @@ ignore = [
   "N802",    # Public function name should be lower case (i.e. get_X())
 ]
 
-exclude = [
-  # TODO(eddiebergman): Tests should be re-enabled after the refactor
-  "tests",
-  #
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".mypy_cache",
-  ".nox",
-  ".pants.d",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "docs",
-]
-
-# Exclude a variety of commonly ignored directories.
-[tool.ruff.per-file-ignores]
-"tests/*.py" = [
-  "D100",   # Undocumented public module
-  "D101",   # Missing docstring in public class
-  "D102",   # Missing docstring in public method
-  "D103",   # Missing docstring in public function
-  "S101",   # Use of assert
-  "ANN201", # Missing return type annotation for public function
-  "FBT001", # Positional boolean argument
-  "PLR2004",# No use of magic numbers
-  "PD901",  #  X is a bad variable name. (pandas)
-  "TCH",    # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
-  "N803",   # Argument name {name} should be lowercase
-]
-"openml/cli.py" = [
-  "T201",   # print found
-  "T203",   # pprint found
-]
-"openml/__version__.py" = [
-  "D100",   # Undocumented public module
-]
-"__init__.py" = [
-  "I002",   # Missing required import (i.e. from __future__ import annotations)
-]
-"examples/*.py" = [
-  "D101",   # Missing docstring in public class
-  "D102",   # Missing docstring in public method
-  "D103",   # Missing docstring in public function
-  "D415",   # First line should end with a . or ? or !
-  "INP001", # File is part of an implicit namespace package, add an __init__.py
-  "I002",   # Missing required import (i.e. from __future__ import annotations) 
-  "E741",   # Ambigiuous variable name
-  "T201",   # print found
-  "T203",   # pprint found
-  "ERA001", # found commeneted out code
-  "E402",   # Module level import not at top of cell
-  "E501",   # Line too long
-]
 
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["openml"]
 no-lines-before = ["future"]
 required-imports = ["from __future__ import annotations"]
@@ -287,7 +289,7 @@ combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
 force-wrap-aliases = true
 
-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
 [tool.mypy]

From b830d7c4a9834341acd9545451757e3f461f8148 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Mon, 14 Oct 2024 10:49:15 +0200
Subject: [PATCH 3/3] maint: Update to 3.8 min

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c5a3dac0e..0496bf23d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -293,7 +293,7 @@ force-wrap-aliases = true
 convention = "numpy"
 
 [tool.mypy]
-python_version = "3.7"
+python_version = "3.8"
 packages = ["openml", "tests"]
 
 show_error_codes = true