diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 9865c86df..e10ef0b65 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -366,18 +366,15 @@ def _send_request( # noqa: C901 # -- Check if encoding is not UTF-8 perhaps if __is_checksum_equal(response.content, md5_checksum): raise OpenMLHashException( - "Checksum of downloaded file is unequal to the expected checksum {}" - "because the text encoding is not UTF-8 when downloading {}. " + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum}" + f"because the text encoding is not UTF-8 when downloading {url}. " "There might be a sever-sided issue with the file, " - "see: https://github.com/openml/openml-python/issues/1180.".format( - md5_checksum, - url, - ), + "see: https://github.com/openml/openml-python/issues/1180.", ) raise OpenMLHashException( - "Checksum of downloaded file is unequal to the expected checksum {} " - "when downloading {}.".format(md5_checksum, url), + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} " + f"when downloading {url}.", ) return response @@ -443,7 +440,7 @@ def __parse_server_exception( server_exception = xmltodict.parse(response.text) except xml.parsers.expat.ExpatError as e: raise e - except Exception as e: # noqa: BLE001 + except Exception as e: # OpenML has a sophisticated error system # where information about failures is provided. try to parse this raise OpenMLServerError( diff --git a/openml/cli.py b/openml/cli.py index 5732442d0..5def8ef4f 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -1,4 +1,5 @@ -""""Command Line Interface for `openml` to configure its settings.""" +""" "Command Line Interface for `openml` to configure its settings.""" + from __future__ import annotations import argparse diff --git a/openml/config.py b/openml/config.py index 4744dbe86..7541b54ee 100644 --- a/openml/config.py +++ b/openml/config.py @@ -273,8 +273,8 @@ def _setup(config: _Config | None = None) -> None: _root_cache_directory.mkdir(exist_ok=True, parents=True) except PermissionError: openml_logger.warning( - "No permission to create openml cache directory at %s! This can result in " - "OpenML-Python not working properly." % _root_cache_directory, + f"No permission to create openml cache directory at {_root_cache_directory}! This can result in " + "OpenML-Python not working properly.", ) if cache_exists: @@ -282,8 +282,8 @@ def _setup(config: _Config | None = None) -> None: else: _create_log_handlers(create_file_handler=False) openml_logger.warning( - "No permission to create OpenML directory at %s! This can result in OpenML-Python " - "not working properly." % config_dir, + f"No permission to create OpenML directory at {config_dir}! This can result in OpenML-Python " + "not working properly.", ) diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 0c9da1caf..f3c49a178 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -156,14 +156,14 @@ def find_invalid_characters(string: str, pattern: str) -> str: ) if dataset_id is None: - pattern = "^[\x00-\x7F]*$" + pattern = "^[\x00-\x7f]*$" if description and not re.match(pattern, description): # not basiclatin (XSD complains) invalid_characters = find_invalid_characters(description, pattern) raise ValueError( f"Invalid symbols {invalid_characters} in description: {description}", ) - pattern = "^[\x00-\x7F]*$" + pattern = "^[\x00-\x7f]*$" if citation and not re.match(pattern, citation): # not basiclatin (XSD complains) invalid_characters = find_invalid_characters(citation, pattern) @@ -540,7 +540,7 @@ def _cache_compressed_file_from_file( elif data_file.suffix == ".pq": try: data = pd.read_parquet(data_file) - except Exception as e: # noqa: BLE001 + except Exception as e: raise Exception(f"File: {data_file}") from e categorical = [data[c].dtype.name == "category" for c in data.columns] @@ -806,7 +806,7 @@ def get_data( # noqa: C901, PLR0912, PLR0915 to_exclude.extend(self.ignore_attribute) if len(to_exclude) > 0: - logger.info("Going to remove the following attributes: %s" % to_exclude) + logger.info(f"Going to remove the following attributes: {to_exclude}") keep = np.array([column not in to_exclude for column in attribute_names]) data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep] diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index a797588d4..18335ed8d 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -85,8 +85,7 @@ def list_datasets( *, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -98,8 +97,7 @@ def list_datasets( tag: str | None, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -111,8 +109,7 @@ def list_datasets( tag: str | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_datasets( @@ -207,8 +204,7 @@ def _list_datasets( data_id: list | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> dict: - ... +) -> dict: ... @overload @@ -216,8 +212,7 @@ def _list_datasets( data_id: list | None = ..., output_format: Literal["dataframe"] = "dataframe", **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def _list_datasets( @@ -256,18 +251,16 @@ def _list_datasets( for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if data_id is not None: - api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id]) + api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id])) return __list_datasets(api_call=api_call, output_format=output_format) @overload -def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_datasets( @@ -804,10 +797,7 @@ def create_dataset( # noqa: C901, PLR0912, PLR0915 if not is_row_id_an_attribute: raise ValueError( "'row_id_attribute' should be one of the data attribute. " - " Got '{}' while candidates are {}.".format( - row_id_attribute, - [attr[0] for attr in attributes_], - ), + f" Got '{row_id_attribute}' while candidates are {[attr[0] for attr in attributes_]}.", ) if isinstance(data, pd.DataFrame): diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index a854686d1..a39096a58 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -32,8 +32,7 @@ def list_evaluations( per_fold: bool | None = ..., sort_order: str | None = ..., output_format: Literal["dict", "object"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -51,8 +50,7 @@ def list_evaluations( per_fold: bool | None = ..., sort_order: str | None = ..., output_format: Literal["dataframe"] = ..., -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_evaluations( @@ -204,24 +202,24 @@ def _list_evaluations( ------- dict of objects, or dataframe """ - api_call = "evaluation/list/function/%s" % function + api_call = f"evaluation/list/function/{function}" if kwargs is not None: for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if tasks is not None: - api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks]) + api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks])) if setups is not None: - api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups]) + api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups])) if flows is not None: - api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows]) + api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows])) if runs is not None: - api_call += "/run/%s" % ",".join([str(int(i)) for i in runs]) + api_call += "/run/{}".format(",".join([str(int(i)) for i in runs])) if uploaders is not None: - api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders]) + api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders])) if study is not None: api_call += "/study/%d" % study if sort_order is not None: - api_call += "/sort_order/%s" % sort_order + api_call += f"/sort_order/{sort_order}" return __list_evaluations(api_call, output_format=output_format) @@ -236,7 +234,7 @@ def __list_evaluations( # Minimalistic check if the XML is useful if "oml:evaluations" not in evals_dict: raise ValueError( - "Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict), + "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}', ) assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type( diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 3427ca7c9..075f1cdce 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -419,7 +419,7 @@ def _deserialize_sklearn( # noqa: PLR0915, C901, PLR0913, PLR0912 strict_version=strict_version, ) else: - raise ValueError("Cannot flow_to_sklearn %s" % serialized_type) + raise ValueError(f"Cannot flow_to_sklearn {serialized_type}") else: rval = OrderedDict( @@ -979,9 +979,7 @@ def flatten_all(list_): # length 2 is for {VotingClassifier.estimators, # Pipeline.steps, FeatureUnion.transformer_list} # length 3 is for ColumnTransformer - msg = "Length of tuple of type {} does not match assumptions".format( - sub_component_type, - ) + msg = f"Length of tuple of type {sub_component_type} does not match assumptions" raise ValueError(msg) if isinstance(sub_component, str): @@ -989,7 +987,7 @@ def flatten_all(list_): msg = ( "Second item of tuple does not match assumptions. " "If string, can be only 'drop' or 'passthrough' but" - "got %s" % sub_component + f"got {sub_component}" ) raise ValueError(msg) elif sub_component is None: @@ -1002,15 +1000,15 @@ def flatten_all(list_): elif not isinstance(sub_component, OpenMLFlow): msg = ( "Second item of tuple does not match assumptions. " - "Expected OpenMLFlow, got %s" % type(sub_component) + f"Expected OpenMLFlow, got {type(sub_component)}" ) raise TypeError(msg) if identifier in reserved_keywords: parent_model = f"{model.__module__}.{model.__class__.__name__}" - msg = "Found element shadowing official " "parameter for {}: {}".format( - parent_model, - identifier, + msg = ( + "Found element shadowing official " + f"parameter for {parent_model}: {identifier}" ) raise PyOpenMLError(msg) @@ -1035,9 +1033,9 @@ def flatten_all(list_): model=None, ) component_reference: OrderedDict[str, str | dict] = OrderedDict() - component_reference[ - "oml-python:serialized_object" - ] = COMPOSITION_STEP_CONSTANT + component_reference["oml-python:serialized_object"] = ( + COMPOSITION_STEP_CONSTANT + ) cr_value: dict[str, Any] = OrderedDict() cr_value["key"] = identifier cr_value["step_name"] = identifier @@ -1218,7 +1216,7 @@ def _check_dependencies( for dependency_string in dependencies_list: match = DEPENDENCIES_PATTERN.match(dependency_string) if not match: - raise ValueError("Cannot parse dependency %s" % dependency_string) + raise ValueError(f"Cannot parse dependency {dependency_string}") dependency_name = match.group("name") operation = match.group("operation") @@ -1237,7 +1235,7 @@ def _check_dependencies( installed_version > required_version or installed_version == required_version ) else: - raise NotImplementedError("operation '%s' is not supported" % operation) + raise NotImplementedError(f"operation '{operation}' is not supported") message = ( "Trying to deserialize a model with dependency " f"{dependency_string} not satisfied." @@ -1812,10 +1810,7 @@ def _prediction_to_probabilities( # then we need to add a column full of zeros into the probabilities # for class 3 because the rest of the library expects that the # probabilities are ordered the same way as the classes are ordered). - message = "Estimator only predicted for {}/{} classes!".format( - proba_y.shape[1], - len(task.class_labels), - ) + message = f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)} classes!" warnings.warn(message, stacklevel=2) openml.config.logger.warning(message) @@ -2008,9 +2003,7 @@ def is_subcomponent_specification(values): pass else: raise TypeError( - "Subcomponent flow should be of type flow, but is {}".format( - type(subcomponent_flow), - ), + f"Subcomponent flow should be of type flow, but is {type(subcomponent_flow)}", ) current = { @@ -2129,8 +2122,7 @@ def instantiate_model_from_hpo_class( """ if not self._is_hpo_class(model): raise AssertionError( - "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV" - % model, + f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV", ) base_estimator = model.estimator base_estimator.set_params(**trace_iteration.get_parameters()) @@ -2192,8 +2184,7 @@ def _obtain_arff_trace( """ if not self._is_hpo_class(model): raise AssertionError( - "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV" - % model, + f"Flow model {model} is not an instance of sklearn.model_selection._search.BaseSearchCV", ) if not hasattr(model, "cv_results_"): raise ValueError("model should contain `cv_results_`") @@ -2228,7 +2219,7 @@ def _obtain_arff_trace( # hyperparameter layer_sizes of MLPClassifier type = "STRING" # noqa: A001 else: - raise TypeError("Unsupported param type in param grid: %s" % key) + raise TypeError(f"Unsupported param type in param grid: {key}") # renamed the attribute param to parameter, as this is a required # OpenML convention - this also guards against name collisions diff --git a/openml/flows/flow.py b/openml/flows/flow.py index 4e437e35c..8b9f584fb 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -135,15 +135,13 @@ def __init__( # noqa: PLR0913 keys_parameters_meta_info = set(parameters_meta_info.keys()) if len(keys_parameters.difference(keys_parameters_meta_info)) > 0: raise ValueError( - "Parameter %s only in parameters, but not in " - "parameters_meta_info." - % str(keys_parameters.difference(keys_parameters_meta_info)), + f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in parameters, but not in " + "parameters_meta_info.", ) if len(keys_parameters_meta_info.difference(keys_parameters)) > 0: raise ValueError( - "Parameter %s only in parameters_meta_info, " - "but not in parameters." - % str(keys_parameters_meta_info.difference(keys_parameters)), + f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in parameters_meta_info, " + "but not in parameters.", ) self.external_version = external_version diff --git a/openml/flows/functions.py b/openml/flows/functions.py index b01e54b44..3d056ac60 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -140,8 +140,7 @@ def list_flows( tag: str | None = ..., output_format: Literal["dict"] = "dict", **kwargs: Any, -) -> dict: - ... +) -> dict: ... @overload @@ -152,8 +151,7 @@ def list_flows( *, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... @overload @@ -163,8 +161,7 @@ def list_flows( tag: str | None, output_format: Literal["dataframe"], **kwargs: Any, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_flows( @@ -243,18 +240,15 @@ def list_flows( @overload -def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: - ... +def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: ... @overload -def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... @overload -def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... def _list_flows( @@ -391,13 +385,11 @@ def get_flow_id( @overload -def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_flows( @@ -453,7 +445,7 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None: while len(stack) > 0: current = stack.pop() if current.flow_id is None: - raise ValueError("Flow %s has no flow_id!" % current.name) + raise ValueError(f"Flow {current.name} has no flow_id!") for component in current.components.values(): stack.append(component) @@ -492,10 +484,10 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915 Whether to ignore matching of flow descriptions. """ if not isinstance(flow1, OpenMLFlow): - raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1)) + raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}") if not isinstance(flow2, OpenMLFlow): - raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2)) + raise TypeError(f"Argument 2 must be of type OpenMLFlow, but is {type(flow2)}") # TODO as they are actually now saved during publish, it might be good to # check for the equality of these as well. @@ -522,11 +514,11 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915 for name in set(attr1.keys()).union(attr2.keys()): if name not in attr1: raise ValueError( - "Component %s only available in " "argument2, but not in argument1." % name, + f"Component {name} only available in " "argument2, but not in argument1.", ) if name not in attr2: raise ValueError( - "Component %s only available in " "argument2, but not in argument1." % name, + f"Component {name} only available in " "argument2, but not in argument1.", ) assert_flows_equal( attr1[name], @@ -549,9 +541,9 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915 symmetric_difference = params_flow_1 ^ params_flow_2 if len(symmetric_difference) > 0: raise ValueError( - "Flow %s: parameter set of flow " + f"Flow {flow1.name}: parameter set of flow " "differs from the parameters stored " - "on the server." % flow1.name, + "on the server.", ) if ignore_parameter_values_on_older_children: diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 7a082e217..e42097cee 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -680,9 +680,9 @@ def _calculate_local_measure( # type: ignore user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[ measure ] - user_defined_measures_per_sample[measure][rep_no][fold_no][ - sample_no - ] = user_defined_measures_fold[measure] + user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = ( + user_defined_measures_fold[measure] + ) trace: OpenMLRunTrace | None = None if len(traces) > 0: @@ -784,13 +784,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 raise NotImplementedError(task.task_type) config.logger.info( - "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format( - str(model), - openml.datasets.get_dataset(task.dataset_id).name, - rep_no, - fold_no, - sample_no, - ), + f"Going to run model {model!s} on dataset {openml.datasets.get_dataset(task.dataset_id).name} for repeat {rep_no} fold {fold_no} sample {sample_no}", ) ( pred_y, @@ -979,7 +973,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): # type: ignore else: raise ValueError( 'Could not find keys "value" or ' - '"array_data" in %s' % str(evaluation_dict.keys()), + f'"array_data" in {evaluation_dict.keys()!s}', ) if ( "@repeat" in evaluation_dict @@ -1212,15 +1206,15 @@ def _list_runs( # noqa: PLR0913 for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if id is not None: - api_call += "/run/%s" % ",".join([str(int(i)) for i in id]) + api_call += "/run/{}".format(",".join([str(int(i)) for i in id])) if task is not None: - api_call += "/task/%s" % ",".join([str(int(i)) for i in task]) + api_call += "/task/{}".format(",".join([str(int(i)) for i in task])) if setup is not None: - api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup]) + api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) if flow is not None: - api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow]) + api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow])) if uploader is not None: - api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader]) + api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader])) if study is not None: api_call += "/study/%d" % study if display_errors: diff --git a/openml/runs/run.py b/openml/runs/run.py index 766f8c97f..945264131 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -480,7 +480,7 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]: ] else: - raise NotImplementedError("Task type %s is not yet supported." % str(task.task_type)) + raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.") return arff_dict diff --git a/openml/runs/trace.py b/openml/runs/trace.py index 3b7d60c2f..d9a703645 100644 --- a/openml/runs/trace.py +++ b/openml/runs/trace.py @@ -80,8 +80,7 @@ def __post_init__(self) -> None: if self.parameters is not None and not isinstance(self.parameters, dict): raise TypeError( - "argument parameters is not an instance of OrderedDict, but %s" - % str(type(self.parameters)), + f"argument parameters is not an instance of OrderedDict, but {type(self.parameters)!s}", ) def get_parameters(self) -> dict[str, Any]: @@ -351,7 +350,7 @@ def _trace_from_arff_struct( for required_attribute in REQUIRED_ATTRIBUTES: if required_attribute not in attribute_idx: - raise ValueError("arff misses required attribute: %s" % required_attribute) + raise ValueError(f"arff misses required attribute: {required_attribute}") if "setup_string" in attribute_idx: raise ValueError(error_message) @@ -383,7 +382,7 @@ def _trace_from_arff_struct( else: raise ValueError( 'expected {"true", "false"} value for selected field, ' - "received: %s" % selected_value, + f"received: {selected_value}", ) parameters = { @@ -448,7 +447,7 @@ def trace_from_xml(cls, xml: str | Path | IO) -> OpenMLRunTrace: else: raise ValueError( 'expected {"true", "false"} value for ' - "selected field, received: %s" % selected_value, + f"selected field, received: {selected_value}", ) current = OpenMLTraceIteration( @@ -504,10 +503,7 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace: if list(param_keys) != list(trace_itr_keys): raise ValueError( "Cannot merge traces because the parameters are not equal: " - "{} vs {}".format( - list(trace_itr.parameters.keys()), - list(iteration.parameters.keys()), - ), + f"{list(trace_itr.parameters.keys())} vs {list(iteration.parameters.keys())}", ) if key in merged_trace: @@ -521,10 +517,7 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace: return cls(None, merged_trace) def __repr__(self) -> str: - return "[Run id: {}, {} trace iterations]".format( - -1 if self.run_id is None else self.run_id, - len(self.trace_iterations), - ) + return f"[Run id: {-1 if self.run_id is None else self.run_id}, {len(self.trace_iterations)} trace iterations]" def __iter__(self) -> Iterator[OpenMLTraceIteration]: yield from self.trace_iterations.values() diff --git a/openml/setups/functions.py b/openml/setups/functions.py index ee0c6d707..0bcd2b4e2 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -212,7 +212,7 @@ def _list_setups( """ api_call = "setup/list" if setup is not None: - api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup]) + api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) if kwargs is not None: for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" @@ -230,13 +230,12 @@ def __list_setups( # Minimalistic check if the XML is useful if "oml:setups" not in setups_dict: raise ValueError( - 'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict), + 'Error in return XML, does not contain "oml:setups":' f" {setups_dict!s}", ) if "@xmlns:oml" not in setups_dict["oml:setups"]: raise ValueError( - "Error in return XML, does not contain " - '"oml:setups"/@xmlns:oml: %s' % str(setups_dict), + "Error in return XML, does not contain " f'"oml:setups"/@xmlns:oml: {setups_dict!s}', ) if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri: @@ -364,7 +363,7 @@ def _create_setup_from_xml( else: raise ValueError( "Expected None, list or dict, received " - "something else: %s" % str(type(xml_parameters)), + f"something else: {type(xml_parameters)!s}", ) if _output_format in ["dataframe", "dict"]: diff --git a/openml/study/functions.py b/openml/study/functions.py index 9d726d286..7fdc6f636 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -90,7 +90,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy: ) result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)["oml:study"] study_id = int(result_dict["oml:id"]) - alias = result_dict["oml:alias"] if "oml:alias" in result_dict else None + alias = result_dict.get("oml:alias", None) main_entity_type = result_dict["oml:main_entity_type"] if entity_type != main_entity_type: @@ -99,9 +99,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy: f", expected '{entity_type}'" ) - benchmark_suite = ( - result_dict["oml:benchmark_suite"] if "oml:benchmark_suite" in result_dict else None - ) + benchmark_suite = result_dict.get("oml:benchmark_suite", None) name = result_dict["oml:name"] description = result_dict["oml:description"] status = result_dict["oml:status"] @@ -300,7 +298,7 @@ def update_study_status(study_id: int, status: str) -> None: """ legal_status = {"active", "deactivated"} if status not in legal_status: - raise ValueError("Illegal status value. " "Legal values: %s" % legal_status) + raise ValueError("Illegal status value. " f"Legal values: {legal_status}") data = {"study_id": study_id, "status": status} # type: openml._api_calls.DATA_TYPE result_xml = openml._api_calls._perform_api_call("study/status/update", "post", data=data) result = xmltodict.parse(result_xml) @@ -442,8 +440,7 @@ def list_suites( status: str | None = ..., uploader: list[int] | None = ..., output_format: Literal["dict"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -453,8 +450,7 @@ def list_suites( status: str | None = ..., uploader: list[int] | None = ..., output_format: Literal["dataframe"] = "dataframe", -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_suites( @@ -538,8 +534,7 @@ def list_studies( uploader: list[str] | None = ..., benchmark_suite: int | None = ..., output_format: Literal["dict"] = "dict", -) -> dict: - ... +) -> dict: ... @overload @@ -550,8 +545,7 @@ def list_studies( uploader: list[str] | None = ..., benchmark_suite: int | None = ..., output_format: Literal["dataframe"] = "dataframe", -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def list_studies( @@ -637,13 +631,11 @@ def list_studies( @overload -def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: - ... +def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: ... @overload -def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: - ... +def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ... def _list_studies( @@ -674,13 +666,11 @@ def _list_studies( @overload -def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: - ... +def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ... @overload -def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: - ... +def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ... def __list_studies( diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index c763714bf..ec87d1c36 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -98,8 +98,9 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]: raise ValueError( "Error in return XML, value of " "oml:estimationprocedures/@xmlns:oml is not " - "http://openml.org/openml, but %s" - % str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]), + "http://openml.org/openml, but {}".format( + str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]) + ), ) procs: list[dict[str, Any]] = [] @@ -276,7 +277,7 @@ def __list_tasks( # noqa: PLR0912, C901 raise ValueError( "Error in return XML, value of " '"oml:runs"/@xmlns:oml is not ' - '"http://openml.org/openml": %s' % str(tasks_dict), + f'"http://openml.org/openml": {tasks_dict!s}', ) assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"]) @@ -541,7 +542,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask: TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, }.get(task_type) if cls is None: - raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"]) + raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"])) return cls(**common_kwargs) # type: ignore diff --git a/openml/tasks/split.py b/openml/tasks/split.py index 81105f1fd..ac538496e 100644 --- a/openml/tasks/split.py +++ b/openml/tasks/split.py @@ -177,9 +177,9 @@ def get(self, repeat: int = 0, fold: int = 0, sample: int = 0) -> tuple[np.ndarr If the specified repeat, fold, or sample is not known. """ if repeat not in self.split: - raise ValueError("Repeat %s not known" % str(repeat)) + raise ValueError(f"Repeat {repeat!s} not known") if fold not in self.split[repeat]: - raise ValueError("Fold %s not known" % str(fold)) + raise ValueError(f"Fold {fold!s} not known") if sample not in self.split[repeat][fold]: - raise ValueError("Sample %s not known" % str(sample)) + raise ValueError(f"Sample {sample!s} not known") return self.split[repeat][fold][sample] diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 4ad4cec62..c64fb987c 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -280,8 +280,7 @@ def get_X_and_y( ) -> tuple[ np.ndarray | scipy.sparse.spmatrix, np.ndarray | None, - ]: - ... + ]: ... @overload def get_X_and_y( @@ -289,8 +288,7 @@ def get_X_and_y( ) -> tuple[ pd.DataFrame, pd.Series | pd.DataFrame | None, - ]: - ... + ]: ... # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`? def get_X_and_y( @@ -539,12 +537,10 @@ def __init__( # noqa: PLR0913 def get_X( self, dataset_format: Literal["array"] = "array", - ) -> np.ndarray | scipy.sparse.spmatrix: - ... + ) -> np.ndarray | scipy.sparse.spmatrix: ... @overload - def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: - ... + def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ... def get_X( self, diff --git a/openml/testing.py b/openml/testing.py index 4af361507..4c9520b3a 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -182,7 +182,7 @@ def _get_sentinel(self, sentinel: str | None = None) -> str: md5.update(str(time.time()).encode("utf-8")) md5.update(str(os.getpid()).encode("utf-8")) sentinel = md5.hexdigest()[:10] - sentinel = "TEST%s" % sentinel + sentinel = f"TEST{sentinel}" return sentinel def _add_sentinel_to_flow_name( diff --git a/openml/utils.py b/openml/utils.py index 80d7caaae..94248d8f5 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -33,8 +33,7 @@ def extract_xml_tags( node: Mapping[str, Any], *, allow_none: Literal[True] = ..., -) -> Any | None: - ... +) -> Any | None: ... @overload @@ -43,8 +42,7 @@ def extract_xml_tags( node: Mapping[str, Any], *, allow_none: Literal[False], -) -> Any: - ... +) -> Any: ... def extract_xml_tags( @@ -196,7 +194,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool: "user", } if entity_type not in legal_entities: - raise ValueError("Can't delete a %s" % entity_type) + raise ValueError(f"Can't delete a {entity_type}") url_suffix = "%s/%d" % (entity_type, entity_id) try: @@ -243,8 +241,7 @@ def _list_all( list_output_format: Literal["dict"] = ..., *args: P.args, **filters: P.kwargs, -) -> dict: - ... +) -> dict: ... @overload @@ -253,8 +250,7 @@ def _list_all( list_output_format: Literal["object"], *args: P.args, **filters: P.kwargs, -) -> dict: - ... +) -> dict: ... @overload @@ -263,8 +259,7 @@ def _list_all( list_output_format: Literal["dataframe"], *args: P.args, **filters: P.kwargs, -) -> pd.DataFrame: - ... +) -> pd.DataFrame: ... def _list_all( # noqa: C901, PLR0912 @@ -374,7 +369,7 @@ def _create_cache_directory(key: str) -> Path: try: cache_dir.mkdir(exist_ok=True, parents=True) - except Exception as e: # noqa: BLE001 + except Exception as e: raise openml.exceptions.OpenMLCacheException( f"Cannot create cache directory {cache_dir}." ) from e @@ -410,7 +405,7 @@ def _create_cache_directory_for_id(key: str, id_: int) -> Path: """ cache_dir = _get_cache_dir_for_id(key, id_, create=True) if cache_dir.exists() and not cache_dir.is_dir(): - raise ValueError("%s cache dir exists but is not a directory!" % key) + raise ValueError(f"{key} cache dir exists but is not a directory!") cache_dir.mkdir(exist_ok=True, parents=True) return cache_dir