diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 9865c86df..976d14c64 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -366,18 +366,15 @@ def _send_request( # noqa: C901 # -- Check if encoding is not UTF-8 perhaps if __is_checksum_equal(response.content, md5_checksum): raise OpenMLHashException( - "Checksum of downloaded file is unequal to the expected checksum {}" - "because the text encoding is not UTF-8 when downloading {}. " + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum}" + f"because the text encoding is not UTF-8 when downloading {url}. " "There might be a sever-sided issue with the file, " - "see: https://github.com/openml/openml-python/issues/1180.".format( - md5_checksum, - url, - ), + "see: https://github.com/openml/openml-python/issues/1180.", ) raise OpenMLHashException( - "Checksum of downloaded file is unequal to the expected checksum {} " - "when downloading {}.".format(md5_checksum, url), + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} " + f"when downloading {url}.", ) return response diff --git a/openml/cli.py b/openml/cli.py index 5732442d0..5def8ef4f 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -1,4 +1,5 @@ -""""Command Line Interface for `openml` to configure its settings.""" +""" "Command Line Interface for `openml` to configure its settings.""" + from __future__ import annotations import argparse diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 0c9da1caf..d1f4d6092 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str: ) if dataset_id is None: - pattern = "^[\x00-\x7F]*$" + pattern = "^[\x00-\x7f]*$" if description and not re.match(pattern, description): # not basiclatin (XSD complains) invalid_characters = find_invalid_characters(description, pattern) raise ValueError( f"Invalid symbols {invalid_characters} in description: {description}", ) - pattern = "^[\x00-\x7F]*$" + pattern = "^[\x00-\x7f]*$" if citation and not re.match(pattern, citation): # not basiclatin (XSD complains) invalid_characters = find_invalid_characters(citation, pattern) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index a797588d4..50a6ec2d6 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -85,8 +85,7 @@ def list_datasets( *, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -98,8 +97,7 @@ def list_datasets( tag: str | None, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -111,8 +109,7 @@ def list_datasets( tag: str | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_datasets( @@ -207,8 +204,7 @@ def _list_datasets( data_id: list | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> dict: - ... +) -> dict: ... @overload @@ -216,8 +212,7 @@ def _list_datasets( data_id: list | None = ..., output_format: Literal["dataframe"] = "dataframe", **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def _list_datasets( @@ -261,13 +256,11 @@ def _list_datasets( @overload -def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_datasets( @@ -804,10 +797,7 @@ def create_dataset( # noqa: C901, PLR0912, PLR0915 if not is_row_id_an_attribute: raise ValueError( "'row_id_attribute' should be one of the data attribute. " - " Got '{}' while candidates are {}.".format( - row_id_attribute, - [attr[0] for attr in attributes_], - ), + f" Got '{row_id_attribute}' while candidates are {[attr[0] for attr in attributes_]}.", ) if isinstance(data, pd.DataFrame): diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index a854686d1..5edaad6c6 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -32,8 +32,7 @@ def list_evaluations( per_fold: bool | None = ..., sort_order: str | None = ..., output_format: Literal["dict", "object"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -51,8 +50,7 @@ def list_evaluations( per_fold: bool | None = ..., sort_order: str | None = ..., output_format: Literal["dataframe"] = ..., -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_evaluations( diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 3427ca7c9..98b6df8b7 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -979,9 +979,7 @@ def flatten_all(list_): # length 2 is for {VotingClassifier.estimators, # Pipeline.steps, FeatureUnion.transformer_list} # length 3 is for ColumnTransformer - msg = "Length of tuple of type {} does not match assumptions".format( - sub_component_type, - ) + msg = f"Length of tuple of type {sub_component_type} does not match assumptions" raise ValueError(msg) if isinstance(sub_component, str): @@ -1008,9 +1006,9 @@ def flatten_all(list_): if identifier in reserved_keywords: parent_model = f"{model.__module__}.{model.__class__.__name__}" - msg = "Found element shadowing official " "parameter for {}: {}".format( - parent_model, - identifier, + msg = ( + "Found element shadowing official " + f"parameter for {parent_model}: {identifier}" ) raise PyOpenMLError(msg) @@ -1035,9 +1033,9 @@ def flatten_all(list_): model=None, ) component_reference: OrderedDict[str, str | dict] = OrderedDict() - component_reference[ - "oml-python:serialized_object" - ] = COMPOSITION_STEP_CONSTANT + component_reference["oml-python:serialized_object"] = ( + COMPOSITION_STEP_CONSTANT + ) cr_value: dict[str, Any] = OrderedDict() cr_value["key"] = identifier cr_value["step_name"] = identifier @@ -1812,10 +1810,7 @@ def _prediction_to_probabilities( # then we need to add a column full of zeros into the probabilities # for class 3 because the rest of the library expects that the # probabilities are ordered the same way as the classes are ordered). - message = "Estimator only predicted for {}/{} classes!".format( - proba_y.shape[1], - len(task.class_labels), - ) + message = f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)} classes!" warnings.warn(message, stacklevel=2) openml.config.logger.warning(message) @@ -2008,9 +2003,7 @@ def is_subcomponent_specification(values): pass else: raise TypeError( - "Subcomponent flow should be of type flow, but is {}".format( - type(subcomponent_flow), - ), + f"Subcomponent flow should be of type flow, but is {type(subcomponent_flow)}", ) current = { diff --git a/openml/flows/functions.py b/openml/flows/functions.py index b01e54b44..b1c3e0911 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -140,8 +140,7 @@ def list_flows( tag: str | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> dict: - ... +) -> dict: ... @overload @@ -152,8 +151,7 @@ def list_flows( *, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -163,8 +161,7 @@ def list_flows( tag: str | None, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_flows( @@ -243,18 +240,15 @@ def list_flows( @overload -def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: - ... +def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: ... @overload -def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... @overload -def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... def _list_flows( @@ -391,13 +385,11 @@ def get_flow_id( @overload -def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_flows( diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 7a082e217..d35492359 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -680,9 +680,9 @@ def _calculate_local_measure( # type: ignore user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[ measure ] - user_defined_measures_per_sample[measure][rep_no][fold_no][ - sample_no - ] = user_defined_measures_fold[measure] + user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = ( + user_defined_measures_fold[measure] + ) trace: OpenMLRunTrace | None = None if len(traces) > 0: @@ -784,13 +784,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 raise NotImplementedError(task.task_type) config.logger.info( - "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format( - str(model), - openml.datasets.get_dataset(task.dataset_id).name, - rep_no, - fold_no, - sample_no, - ), + f"Going to run model {model!s} on dataset {openml.datasets.get_dataset(task.dataset_id).name} for repeat {rep_no} fold {fold_no} sample {sample_no}", ) ( pred_y, diff --git a/openml/runs/trace.py b/openml/runs/trace.py index 3b7d60c2f..450e4e486 100644 --- a/openml/runs/trace.py +++ b/openml/runs/trace.py @@ -504,10 +504,7 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace: if list(param_keys) != list(trace_itr_keys): raise ValueError( "Cannot merge traces because the parameters are not equal: " - "{} vs {}".format( - list(trace_itr.parameters.keys()), - list(iteration.parameters.keys()), - ), + f"{list(trace_itr.parameters.keys())} vs {list(iteration.parameters.keys())}", ) if key in merged_trace: @@ -521,10 +518,7 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace: return cls(None, merged_trace) def __repr__(self) -> str: - return "[Run id: {}, {} trace iterations]".format( - -1 if self.run_id is None else self.run_id, - len(self.trace_iterations), - ) + return f"[Run id: {-1 if self.run_id is None else self.run_id}, {len(self.trace_iterations)} trace iterations]" def __iter__(self) -> Iterator[OpenMLTraceIteration]: yield from self.trace_iterations.values() diff --git a/openml/study/functions.py b/openml/study/functions.py index 9d726d286..445a83c46 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -90,7 +90,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy: ) result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)["oml:study"] study_id = int(result_dict["oml:id"]) - alias = result_dict["oml:alias"] if "oml:alias" in result_dict else None + alias = result_dict.get("oml:alias", None) main_entity_type = result_dict["oml:main_entity_type"] if entity_type != main_entity_type: @@ -99,9 +99,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy: f", expected '{entity_type}'" ) - benchmark_suite = ( - result_dict["oml:benchmark_suite"] if "oml:benchmark_suite" in result_dict else None - ) + benchmark_suite = result_dict.get("oml:benchmark_suite", None) name = result_dict["oml:name"] description = result_dict["oml:description"] status = result_dict["oml:status"] @@ -442,8 +440,7 @@ def list_suites( status: str | None = ..., uploader: list[int] | None = ..., output_format: Literal["dict"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -453,8 +450,7 @@ def list_suites( status: str | None = ..., uploader: list[int] | None = ..., output_format: Literal["dataframe"] = "dataframe", -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_suites( @@ -538,8 +534,7 @@ def list_studies( uploader: list[str] | None = ..., benchmark_suite: int | None = ..., output_format: Literal["dict"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -550,8 +545,7 @@ def list_studies( uploader: list[str] | None = ..., benchmark_suite: int | None = ..., output_format: Literal["dataframe"] = "dataframe", -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_studies( @@ -637,13 +631,11 @@ def list_studies( @overload -def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: - ... +def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: ... @overload -def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... def _list_studies( @@ -674,13 +666,11 @@ def _list_studies( @overload -def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_studies( diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 4ad4cec62..c64fb987c 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -280,8 +280,7 @@ def get_X_and_y( ) -> tuple[ np.ndarray | scipy.sparse.spmatrix, np.ndarray | None, - ]: - ... + ]: ... @overload def get_X_and_y( @@ -289,8 +288,7 @@ def get_X_and_y( ) -> tuple[ pd.DataFrame, pd.Series | pd.DataFrame | None, - ]: - ... + ]: ... # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`? def get_X_and_y( @@ -539,12 +537,10 @@ def __init__( # noqa: PLR0913 def get_X( self, dataset_format: Literal["array"] = "array", - ) -> np.ndarray | scipy.sparse.spmatrix: - ... + ) -> np.ndarray | scipy.sparse.spmatrix: ... @overload - def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: - ... + def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ... def get_X( self, diff --git a/openml/utils.py b/openml/utils.py index 80d7caaae..b238acfbb 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -33,8 +33,7 @@ def extract_xml_tags( node: Mapping[str, Any], *, allow_none: Literal[True] = ..., -) -> Any | None: - ... +) -> Any | None: ... @overload @@ -43,8 +42,7 @@ def extract_xml_tags( node: Mapping[str, Any], *, allow_none: Literal[False], -) -> Any: - ... +) -> Any: ... def extract_xml_tags( @@ -243,8 +241,7 @@ def _list_all( list_output_format: Literal["dict"] = ..., *args: P.args, **filters: P.kwargs, -) -> dict: - ... +) -> dict: ... @overload @@ -253,8 +250,7 @@ def _list_all( list_output_format: Literal["object"], *args: P.args, **filters: P.kwargs, -) -> dict: - ... +) -> dict: ... @overload @@ -263,8 +259,7 @@ def _list_all( list_output_format: Literal["dataframe"], *args: P.args, **filters: P.kwargs, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def _list_all( # noqa: C901, PLR0912