diff --git a/amlb/datasets/file.py b/amlb/datasets/file.py index 68d8c6162..abc51fc82 100644 --- a/amlb/datasets/file.py +++ b/amlb/datasets/file.py @@ -364,6 +364,8 @@ def __init__(self, path, fold, target, features, cache_dir, config): self.id_column = config['id_column'] self.timestamp_column = config['timestamp_column'] + # Ensure that id_column is parsed as string to avoid incorrect sorting + full_data[self.id_column] = full_data[self.id_column].astype(str) full_data[self.timestamp_column] = pd.to_datetime(full_data[self.timestamp_column]) if config['name'] is not None: file_name = config['name'] @@ -374,11 +376,11 @@ def __init__(self, path, fold, target, features, cache_dir, config): self._train = CsvDatasplit(self, train_path, timestamp_column=self.timestamp_column) self._test = CsvDatasplit(self, test_path, timestamp_column=self.timestamp_column) - self._dtypes = None + self._dtypes = full_data.dtypes # Store repeated item_id & in-sample seasonal error for each time step in the forecast horizon - needed later for metrics like MASE. # We need to store this information here because Result object has no access to past time series values. - self.repeated_item_id = self.test.data[self.id_column].cat.codes.to_numpy() + self.repeated_item_id = self.test.data[self.id_column].astype("category").cat.codes.to_numpy() self.repeated_abs_seasonal_error = self.compute_seasonal_error() def save_train_and_test_splits(self, full_data, fold, save_dir): diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 803815275..88da77d80 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -26,6 +26,12 @@ from ..utils import as_list, lazy_property, path_from_split, profile, split_path, unsparsify +# https://github.com/openml/automlbenchmark/pull/574#issuecomment-1646179921 +try: + set_openml_cache = oml.config.set_cache_directory +except AttributeError: + set_openml_cache = oml.config.set_root_cache_directory + log = logging.getLogger(__name__) # hack (only adding a ? to the regexp pattern) to ensure that '?' values remain quoted when we save dataplits in arff format. @@ -40,7 +46,7 @@ class OpenmlLoader: def __init__(self, api_key, cache_dir=None): oml.config.apikey = api_key if cache_dir: - oml.config.set_cache_directory(cache_dir) + set_openml_cache(cache_dir) if oml.config.retry_policy != "robot": log.debug("Setting openml retry_policy from '%s' to 'robot'." % oml.config.retry_policy) diff --git a/amlb/defaults.py b/amlb/defaults.py index 6d0bf35c5..3031be71b 100644 --- a/amlb/defaults.py +++ b/amlb/defaults.py @@ -1,9 +1,15 @@ import pathlib -from openml.config import cache_directory +import openml from amlb.utils import Namespace as ns +# https://github.com/openml/automlbenchmark/pull/574#issuecomment-1646179921 +try: + cache_directory = openml.config.cache_directory +except AttributeError: + cache_directory = openml.config.get_cache_directory() + default_dirs = ns( input_dir=cache_directory, output_dir=str(pathlib.Path(__file__).parent.parent / "results"), diff --git a/amlb/results.py b/amlb/results.py index 07bb48b25..4ce41a1a9 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -18,6 +18,7 @@ from numpy import nan, sort import pandas as pd import scipy as sci +import scipy.sparse from .data import Dataset, DatasetType, Feature from .datautils import accuracy_score, auc, average_precision_score, balanced_accuracy_score, confusion_matrix, fbeta_score, log_loss, \ @@ -295,6 +296,8 @@ def save_predictions(dataset: Dataset, output_file: str, predictions = predictions.squeeze() if isinstance(predictions, S): predictions = predictions.values + if scipy.sparse.issparse(truth) and truth.shape[1] == 1: + truth = pd.DataFrame(truth.todense()) if isinstance(truth, DF): truth = truth.squeeze() if isinstance(truth, S): diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 000000000..a1e52f517 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,130 @@ +# Contributing to the AutoML Benchmark +We appreciate you are considering contributing to the AutoML Benchmark. +Remote collaboration may be hard sometimes, so we provide guidelines in this document +to make the experience as smooth as possible. + +In this document there is information on: + + - [Reporting a Bug](#reporting-a-bug) + - [Suggesting a Feature](#features) + - [Suggesting a Dataset](#datasets) + - [Suggesting Ideas on Benchmark Design](#ideas) + - [Contributing Code or Documentation Changes](#contributing-changes) + +## Reporting a Bug +If you find a bug with the software, please first search our [issue tracker](https://github.com/openml/automlbenchmark/issues) to see if it has been reported before. +If it has been, please see if there is relevant information missing that may help reproduce the issue and add it if necessary. +If there is nothing to add, simply leave a 👍 on the issue. This lets us know more people are affected by it. + +### Creating a Bug Report +After confirming your bug isn't reported on our issue tracker, please open a new issue to make a bug report. +A good bug report should describe the error and also provide: + + * A minimal script (and/or configuration) to reproduce the issue. + * The _observed_ behavior, for example a stack trace with the error. + * The _expected_ behavior. What did you expect to happen? + * Any additional information you may have. + * Information on your installed versions. If applicable, please provide both information about the `runbenchmark` environment and the `framework` environment (typically in `frameworks/FRAMEWORK/venv`). + +Observing these guidelines greatly improves the chance that we are able to help you. +It also allows us to address the issue more quickly, which means we can help more people. + +## Features +If you want to suggest a new feature for the benchmark software, please [open an issue](https://github.com/openml/automlbenchmark/issues/new). +Please motivate why we should consider adding the feature and how the user is expected to use it. + +## Datasets +If you have a suggestion for a new dataset to include in the benchmark, +please [open a discussion on the datasets board](https://github.com/openml/automlbenchmark/discussions/new?category=datasets). +Please motivate why the dataset is a good inclusion for the benchmark. +Examples of good motivations may include: + + * Evidence that it produces interesting results, for example by reporting a small scale benchmark on the dataset. + * Evidence that is represents a very relevant problem, e.g., because it is frequently used in scientific literature. + +Additionally, please provide a link to the data, preferably on [OpenML](openml.org), and indicate its license (if known). +Please note that the benchmark currently supports limited data types. +Suggestions for datasets with data types which are currently not yet be supported are still welcome, +as they may help us more effectively great a good benchmark later when support is added. + +## Ideas +If you have other suggestions about benchmark design, [please open a suggestion on the general board](https://github.com/openml/automlbenchmark/discussions/new?category=general). +Please motivate why we should consider changing (or adding to) the benchmark design. + + +## Contributing Changes +We welcome all contributions by the community. To contribute changes to the +code or documentation, we follow a default git workflow which is also outlined below. + +!!! note "For text changes" + + If you only want to contribute minor text changes, it is possible to do so + directly on Github. Click the pencil icon on the relevant file(s) to edit the documents, + and Github should allow you to automatically commit to your own fork. + After that, set up a pull request as specified below under 'Opening a Pull Request'. + +### Volunteering an Issue +In order to avoid a scenario where multiple people do the same work, the first thing +to do is to make sure we (and other contributors) know you are working on a particular issue or feature. +Please ensure that a related issue is open on the issue board or open one if necessary, and ask to be assigned to that issue. +This communicates with all collaborators that they should not work on that issue, and thus we can avoid double work. +It also gives us a chance to indicate whether we are (still) interested in the proposed changes. +If it is unclear how to add the feature, or if you are unsure of which fix to apply to remove a bug, please discuss this in the issue. + +### Setting up the Development Environment +Fork the repository by clicking on the `fork` button on the top right of our [Github](https://github.com/openml/automlbenchmark) page. +This should create a repository named `automlbenchmark` under your Github account. +Clone this repository (replace `GITHUB_USERNAME`): + +```text +git clone https://github.com/GITHUB_USERNAME/automlbenchmark.git +``` + +!!! warning "Use Python 3.9" + + AutoML benchmark currently only officially supports Python 3.9. + We advise you use that version when developing locally. + +then set up your local virtual environment: + +```text +cd automlbenchmark +python -m venv venv +source venv\bin\activate +python -m pip install -r requirements.txt +python -m pip install -r requirements-dev.txt +``` + +this should set up the minimum requirements for running the benchmark and running our developer tools. +The following commands should now both successfully: + +```text +python runbenchmark.py constantpredictor -f 0 +python -m pytest +python -m mkdocs serve +``` + +When `python -m mkdocs serve` is running, you should be able to navigate to the +local documentation server (by default at `127.0.0.1:8000`) and see the documentation. + +### Make Code Changes +Please make sure that: + + * All added code has annotated type hints and functions have docstrings. + * Changed or added code is covered by unit tests. + * The pull request does not add/change more than it has to in order to fix the bug/add the feature and meet the above criteria. + * The tests and `runbenchmark.py` script still work the same as above. + +In case the PR is a bug fix, please try to convert the minimal reproducing example of +the original issue to a unit test and include it in the test suite to help avoid future regressions. +Finally, commit the changes with a meaningful commit message about what was changed and why. + +### Make Documentation Changes +The software documentation pages are written on `mkdocs` using [`mkdocs-material`](https://squidfunk.github.io/mkdocs-material/getting-started/), +when editing these pages you can see live updates when running the `python -m mkdocs serve` command. +The main landing page with information about the project is written in pure `html` and `css`. + +### Open a Pull Request +When opening a pull request, reference the issue that it closes. +Please also provide any additional context that helps review the pull request that may not have been appropriate as code comments. + diff --git a/docs/readme.md b/docs/readme.md new file mode 100644 index 000000000..735022066 --- /dev/null +++ b/docs/readme.md @@ -0,0 +1,30 @@ +# AutoML Benchmark +The OpenML AutoML Benchmark provides a framework for evaluating and comparing open-source AutoML systems. +The system is *extensible* because you can [add your own](https://openml.github.io/automlbenchmark/docs/extending/) +AutoML frameworks and datasets. For a thorough explanation of the benchmark, and evaluation of results, +you can read our [paper](https://arxiv.org/abs/2207.12560). + +Automatic Machine Learning (AutoML) systems automatically build machine learning pipelines +or neural architectures in a data-driven, objective, and automatic way. They automate a lot +of drudge work in designing machine learning systems, so that better systems can be developed, +faster. However, AutoML research is also slowed down by two factors: + +* We currently lack standardized, easily-accessible benchmarking suites of tasks (datasets) that are curated to reflect important problem domains, practical to use, and sufficiently challenging to support a rigorous analysis of performance results. + +* Subtle differences in the problem definition, such as the design of the hyperparameter search space or the way time budgets are defined, can drastically alter a task’s difficulty. This issue makes it difficult to reproduce published research and compare results from different papers. + +This toolkit aims to address these problems by setting up standardized environments for in-depth experimentation with a wide range of AutoML systems. + +Website: + +Documentation: + +Installation: + +### Features: + +* Curated suites of benchmarking datasets from [OpenML](https://www.openml.org) ([regression](https://www.openml.org/s/269), [classification](https://www.openml.org/s/271)). +* Includes code to benchmark a number of [popular AutoML systems](https://openml.github.io/automlbenchmark/frameworks.html) on regression and classification tasks. +* [New AutoML systems can be added](https://openml.github.io/automlbenchmark/docs/extending/framework/) +* Experiments can be run in Docker or Singularity containers +* Execute experiments locally or on AWS diff --git a/docs/website/data.html b/docs/website/data.html new file mode 100644 index 000000000..985a87c2e --- /dev/null +++ b/docs/website/data.html @@ -0,0 +1,15 @@ + + + AMLB + + + + +

+ This is a redirect page to make sure we can always redirect you to our data, + even if we move it after publication! You should be taken to + https://test.openml.org/amlb/ + automatically. +

+ + diff --git a/docs/website/visualization.html b/docs/website/visualization.html new file mode 100644 index 000000000..f25baa8ef --- /dev/null +++ b/docs/website/visualization.html @@ -0,0 +1,19 @@ + + + AMLB + + + + +

+ This is a redirect page to make sure we can always redirect you to our best + source for visualizing results from the AutoML benchmark, even if we change + where that is after publication! + This page will take you to + + https://compstat-lmu.shinyapps.io/AutoML-Benchmark-Analysis/ + + . +

+ + diff --git a/docs/website/welcome.html b/docs/website/welcome.html new file mode 100644 index 000000000..bc3c5cd0d --- /dev/null +++ b/docs/website/welcome.html @@ -0,0 +1,21 @@ + + + AMLB + + + + +

+ This is will be a redirect page to make sure we can always redirect you to + the best place to get started with contributions to the AutoML benchmark, + even if we change its location after publication! + For now, if you have questions you can visit our + + Github discussions + . If you want to learn more about how to use the software, + please visit our + documentation + . +

+ + diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py index d8c80879d..47321feb6 100644 --- a/examples/custom/extensions/Stacking/exec.py +++ b/examples/custom/extensions/Stacking/exec.py @@ -30,18 +30,17 @@ def run(dataset, config): training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} n_jobs = config.framework_params.get('_n_jobs', config.cores) # useful to disable multicore, regardless of the dataset config - estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final']} + estimators_params = {e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'sgdclassifier', 'sgdregressor', 'svc', 'final']} log.info("Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores.".format(config.max_runtime_seconds, n_jobs)) log.warning("We completely ignore the requirement to stay within the time limit.") log.warning("We completely ignore the advice to optimize towards metric: {}.".format(config.metric)) - if is_classification: estimator = StackingClassifier( estimators=[('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])), - ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])), + ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['sgdclassifier'])), # ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), @@ -54,11 +53,11 @@ def run(dataset, config): estimator = StackingRegressor( estimators=[('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])), - ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])), + ('linear', SGDRegressor(random_state=config.seed, **estimators_params['sgdregressor'])), ('svc', LinearSVR(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']), - final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), + final_estimator=LinearRegression(n_jobs=n_jobs), n_jobs=n_jobs, **training_params ) @@ -66,7 +65,8 @@ def run(dataset, config): with Timer() as training: estimator.fit(X_train, y_train) - predictions = estimator.predict(X_test) + with Timer() as predict: + predictions = estimator.predict(X_test) probabilities = estimator.predict_proba(X_test) if is_classification else None return result(output_file=config.output_predictions_file, @@ -75,7 +75,8 @@ def run(dataset, config): probabilities=probabilities, target_is_encoded=is_classification, models_count=len(estimator.estimators_) + 1, - training_duration=training.duration) + training_duration=training.duration, + predict_duration=predict.duration) if __name__ == '__main__': diff --git a/examples/custom/extensions/Stacking/requirements.txt b/examples/custom/extensions/Stacking/requirements.txt index d2afe9e80..73f0cadcd 100644 --- a/examples/custom/extensions/Stacking/requirements.txt +++ b/examples/custom/extensions/Stacking/requirements.txt @@ -1 +1 @@ -scikit-learn==0.22.1 +scikit-learn==1.3.1 diff --git a/examples/custom/extensions/Stacking/setup.sh b/examples/custom/extensions/Stacking/setup.sh index 352f776ba..0f489d07e 100755 --- a/examples/custom/extensions/Stacking/setup.sh +++ b/examples/custom/extensions/Stacking/setup.sh @@ -2,7 +2,7 @@ shopt -s expand_aliases HERE=$(dirname "$0") -. "$HERE/.setup_env" +. "$HERE/.setup/setup_env" . "$AMLB_ROOT/frameworks/shared/setup.sh" "$HERE" true PIP install -r "$HERE/requirements.txt" diff --git a/examples/custom/frameworks.yaml b/examples/custom/frameworks.yaml index a68884811..e711bcbbb 100644 --- a/examples/custom/frameworks.yaml +++ b/examples/custom/frameworks.yaml @@ -9,15 +9,15 @@ GradientBoosting: Stacking: module: extensions.Stacking - version: '0.22.1' + version: '1.3.1' project: https://scikit-learn.org/stable/modules/ensemble.html#stacking params: _rf_params: {n_estimators: 200} _gbm_params: {n_estimators: 200} - _linear_params: {penalty: elasticnet, loss: log} + _sgdclassifier_params: {penalty: elasticnet, loss: log_loss} + _sgdregressor_params: {penalty: elasticnet} # _svc_params: {tol: 1e-3, max_iter: 1e5} # _final_params: {penalty: elasticnet, loss: log} # sgd linear - _final_params: {max_iter: 1000} # logistic/linear H2OAutoML_nightly: module: frameworks.H2OAutoML diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md index 51286533e..1b5c2dc65 100644 --- a/frameworks/AutoGluon/README.md +++ b/frameworks/AutoGluon/README.md @@ -1,16 +1,5 @@ # AutoGluon -To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...``` +To run v0.8.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...``` -To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest ...``` - - -# AutoGluonTS - -AutoGluonTS stands for autogluon.timeseries. This framework handles time series problems. - -## Run Steps - -To run v0.5.2: ```python3 ../automlbenchmark/runbenchmark.py autogluonts timeseries ...``` - -To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluonts:latest timeseries ...``` +To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluon:latest ...``` diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 89c3372b4..4b670c4fd 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -48,9 +48,10 @@ def run(dataset, config): is_classification = config.type == 'classification' training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} + time_limit = config.max_runtime_seconds presets = training_params.get("presets", []) presets = presets if isinstance(presets, list) else [presets] - if preset_with_refit_full := (set(presets) & {"good_quality", "high_quality"}): + if (preset_with_refit_full := (set(presets) & {"good_quality", "high_quality"})) and (time_limit is not None): preserve = 0.9 preset = next(iter(preset_with_refit_full)) msg = ( @@ -61,7 +62,7 @@ def run(dataset, config): "See https://auto.gluon.ai/stable/api/autogluon.tabular.TabularPredictor.refit_full.html" ) log.info(msg) - config.max_runtime_seconds = preserve * config.max_runtime_seconds + time_limit = preserve * config.max_runtime_seconds train_path, test_path = dataset.train.path, dataset.test.path label = dataset.target.name @@ -77,15 +78,17 @@ def run(dataset, config): problem_type=problem_type, ).fit( train_data=train_path, - time_limit=config.max_runtime_seconds, + time_limit=time_limit, **training_params ) log.info(f"Finished fit in {training.duration}s.") # Persist model in memory that is going to be predicting to get correct inference latency - # max_memory=0.4 will be future default: https://github.com/autogluon/autogluon/pull/3338 - predictor.persist_models('best', max_memory=0.4) + if hasattr(predictor, 'persist'): # autogluon>=1.0 + predictor.persist('best') + else: + predictor.persist_models('best') def inference_time_classification(data: Union[str, pd.DataFrame]): return None, predictor.predict_proba(data, as_multiclass=True) @@ -108,14 +111,17 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): with Timer() as predict: predictions, probabilities = infer(test_data) if is_classification: - predictions = probabilities.idxmax(axis=1).to_numpy() + if hasattr(predictor, 'predict_from_proba'): # autogluon>=1.0 + predictions = predictor.predict_from_proba(probabilities).to_numpy() + else: + predictions = probabilities.idxmax(axis=1).to_numpy() prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None log.info(f"Finished predict in {predict.duration}s.") _leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose) _leaderboard_test = config.framework_params.get('_leaderboard_test', False) # whether to compute test scores in leaderboard (expensive) - leaderboard_kwargs = dict(silent=True, extra_info=_leaderboard_extra_info) + leaderboard_kwargs = dict(extra_info=_leaderboard_extra_info) # Disabled leaderboard test data input by default to avoid long running computation, remove 7200s timeout limitation to re-enable if _leaderboard_test: leaderboard_kwargs['data'] = test_data diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py index 32fd34072..864946d22 100644 --- a/frameworks/AutoGluon/exec_ts.py +++ b/frameworks/AutoGluon/exec_ts.py @@ -17,7 +17,7 @@ from joblib.externals.loky import get_reusable_executor from frameworks.shared.callee import call_run, result, output_subdir -from frameworks.shared.utils import Timer, zip_path +from frameworks.shared.utils import Timer, zip_path, load_timeseries_dataset log = logging.getLogger(__name__) @@ -25,9 +25,16 @@ def run(dataset, config): log.info(f"\n**** AutoGluon TimeSeries [v{__version__}] ****\n") prediction_length = dataset.forecast_horizon_in_steps + train_df, test_df = load_timeseries_dataset(dataset) - train_data = TimeSeriesDataFrame.from_path( - dataset.train_path, + train_data = TimeSeriesDataFrame.from_data_frame( + train_df, + id_column=dataset.id_column, + timestamp_column=dataset.timestamp_column, + ) + + test_data = TimeSeriesDataFrame.from_data_frame( + test_df, id_column=dataset.id_column, timestamp_column=dataset.timestamp_column, ) @@ -45,6 +52,7 @@ def run(dataset, config): predictor.fit( train_data=train_data, time_limit=config.max_runtime_seconds, + random_seed=config.seed, **{k: v for k, v in config.framework_params.items() if not k.startswith('_')}, ) @@ -52,7 +60,6 @@ def run(dataset, config): predictions = pd.DataFrame(predictor.predict(train_data)) # Add columns necessary for the metric computation + quantile forecast to `optional_columns` - test_data_future = pd.read_csv(dataset.test_path, parse_dates=[dataset.timestamp_column]) optional_columns = dict( repeated_item_id=np.load(dataset.repeated_item_id), repeated_abs_seasonal_error=np.load(dataset.repeated_abs_seasonal_error), @@ -61,13 +68,12 @@ def run(dataset, config): optional_columns[str(q)] = predictions[str(q)].values predictions_only = get_point_forecast(predictions, config.metric) - truth_only = test_data_future[dataset.target].values + truth_only = test_df[dataset.target].values # Sanity check - make sure predictions are ordered correctly - future_index = pd.MultiIndex.from_frame(test_data_future[[dataset.id_column, dataset.timestamp_column]]) - assert predictions.index.equals(future_index), "Predictions and test data index do not match" + assert predictions.index.equals(test_data.index), "Predictions and test data index do not match" - test_data_full = pd.concat([train_data, test_data_future.set_index([dataset.id_column, dataset.timestamp_column])]) + test_data_full = pd.concat([train_data, test_data]) leaderboard = predictor.leaderboard(test_data_full, silent=True) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): diff --git a/frameworks/AutoGluon/setup.sh b/frameworks/AutoGluon/setup.sh index c6a61a8c6..6980b6353 100755 --- a/frameworks/AutoGluon/setup.sh +++ b/frameworks/AutoGluon/setup.sh @@ -1,8 +1,11 @@ #!/usr/bin/env bash +# exit when any command fails +set -e + HERE=$(dirname "$0") VERSION=${1:-"stable"} -REPO=${2:-"https://github.com/awslabs/autogluon.git"} +REPO=${2:-"https://github.com/autogluon/autogluon.git"} PKG=${3:-"autogluon"} if [[ "$VERSION" == "latest" ]]; then VERSION="master" diff --git a/frameworks/FEDOT/__init__.py b/frameworks/FEDOT/__init__.py new file mode 100644 index 000000000..86e68de98 --- /dev/null +++ b/frameworks/FEDOT/__init__.py @@ -0,0 +1,25 @@ +from amlb.benchmark import TaskConfig +from amlb.data import Dataset +from amlb.utils import call_script_in_same_dir + + +def setup(*args, **kwargs): + call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) + + +def run(dataset: Dataset, config: TaskConfig): + from frameworks.shared.caller import run_in_venv + + data = dict( + train=dict( + X=dataset.train.X, + y=dataset.train.y + ), + test=dict( + X=dataset.test.X, + y=dataset.test.y + ) + ) + + return run_in_venv(__file__, "exec.py", + input_data=data, dataset=dataset, config=config) diff --git a/frameworks/FEDOT/exec.py b/frameworks/FEDOT/exec.py new file mode 100644 index 000000000..b57448949 --- /dev/null +++ b/frameworks/FEDOT/exec.py @@ -0,0 +1,99 @@ +import logging +import os +from pathlib import Path + +from fedot.api.main import Fedot + +from frameworks.shared.callee import call_run, result, output_subdir +from frameworks.shared.utils import Timer + +log = logging.getLogger(__name__) + + +def run(dataset, config): + log.info("\n**** FEDOT ****\n") + + is_classification = config.type == 'classification' + # Mapping of benchmark metrics to FEDOT metrics + metrics_mapping = dict( + acc='acc', + auc='roc_auc', + f1='f1', + logloss='logloss', + mae='mae', + mse='mse', + msle='msle', + r2='r2', + rmse='rmse' + ) + scoring_metric = metrics_mapping.get(config.metric, None) + + if scoring_metric is None: + log.warning("Performance metric %s not supported.", config.metric) + + training_params = {"preset": "best_quality", "n_jobs": config.cores} + training_params |= {k: v for k, v in config.framework_params.items() if not k.startswith('_')} + n_jobs = training_params["n_jobs"] + + log.info('Running FEDOT with a maximum time of %ss on %s cores, optimizing %s.', + config.max_runtime_seconds, n_jobs, scoring_metric) + runtime_min = config.max_runtime_seconds / 60 + + fedot = Fedot(problem=config.type, timeout=runtime_min, metric=scoring_metric, seed=config.seed, + max_pipeline_fit_time=runtime_min / 10, **training_params) + + with Timer() as training: + fedot.fit(features=dataset.train.X, target=dataset.train.y) + + log.info('Predicting on the test set.') + with Timer() as predict: + predictions = fedot.predict(features=dataset.test.X) + probabilities = None + if is_classification: + probabilities = fedot.predict_proba(features=dataset.test.X, probs_for_all_classes=True) + + save_artifacts(fedot, config) + + return result(output_file=config.output_predictions_file, + predictions=predictions, + truth=dataset.test.y, + probabilities=probabilities, + target_is_encoded=False, + models_count=fedot.current_pipeline.length, + training_duration=training.duration, + predict_duration=predict.duration) + + +def save_artifacts(automl, config): + + artifacts = config.framework_params.get('_save_artifacts', []) + if 'models' in artifacts: + try: + models_dir = output_subdir('models', config) + models_file = os.path.join(models_dir, 'model.json') + automl.current_pipeline.save(models_file) + except Exception as e: + log.info(f"Error when saving 'models': {e}.", exc_info=True) + + if 'info' in artifacts: + try: + info_dir = output_subdir("info", config) + if automl.history: + automl.history.save(os.path.join(info_dir, 'history.json')) + else: + log.info(f"There is no optimization history info to save.") + except Exception as e: + log.info(f"Error when saving info about optimisation history: {e}.", exc_info=True) + + if 'leaderboard' in artifacts: + try: + leaderboard_dir = output_subdir("leaderboard", config) + if automl.history: + lb = automl.history.get_leaderboard() + Path(os.path.join(leaderboard_dir, "leaderboard.csv")).write_text(lb) + except Exception as e: + log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True) + + +if __name__ == '__main__': + call_run(run) diff --git a/frameworks/FEDOT/setup.sh b/frameworks/FEDOT/setup.sh new file mode 100644 index 000000000..a89781583 --- /dev/null +++ b/frameworks/FEDOT/setup.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +HERE=$(dirname "$0") +VERSION=${1:-"stable"} +REPO=${2:-"https://github.com/aimclub/FEDOT.git"} +PKG=${3:-"fedot"} +if [[ "$VERSION" == "latest" ]]; then + VERSION="master" +fi + +# creating local venv +. ${HERE}/../shared/setup.sh ${HERE} true + +RAWREPO=$(echo ${REPO} | sed "s/github\.com/raw\.githubusercontent\.com/") +if [[ "$VERSION" == "stable" ]]; then + PIP install --no-cache-dir -U ${PKG} + echo GET_VERSION_STABLE + VERSION=$(PY -c "${GET_VERSION_STABLE}") +elif [[ "$VERSION" =~ ^[0-9] ]]; then + PIP install --no-cache-dir -U ${PKG}==${VERSION} +else + TARGET_DIR="${HERE}/lib/${PKG}" + rm -Rf ${TARGET_DIR} + + if [[ "$VERSION" =~ ^# ]]; then + COMMIT="${VERSION:1}" + else + # find the latest commit to the VERSION branch + COMMIT=$(git ls-remote "${REPO}" | grep "refs/heads/${VERSION}" | cut -f 1) + DEPTH="--depth 1 --branch ${VERSION}" + fi + + git clone --recurse-submodules --shallow-submodules ${DEPTH} ${REPO} ${TARGET_DIR} + cd ${TARGET_DIR} + git checkout "${COMMIT}" + git submodule update --init --recursive + cd ${HERE} + PIP install -U -e ${TARGET_DIR} +fi + +installed="${HERE}/.setup/installed" +PY -c "from fedot import __version__; print(__version__)" >> "$installed" +if [[ -n $COMMIT ]]; then + truncate -s-1 "$installed" + echo "#${COMMIT}" >> "$installed" +fi diff --git a/frameworks/shared/utils.py b/frameworks/shared/utils.py index abcff3717..26a39f96e 100644 --- a/frameworks/shared/utils.py +++ b/frameworks/shared/utils.py @@ -2,6 +2,7 @@ import importlib.util import logging import os +import pandas as pd import sys @@ -42,6 +43,13 @@ def load_amlb_module(mod, amlb_path=None): return import_module(mod) +def load_timeseries_dataset(dataset): + # Ensure that id_column is loaded as string to avoid incorrect sorting + train_data = pd.read_csv(dataset.train_path, dtype={dataset.id_column: str}, parse_dates=[dataset.timestamp_column]) + test_data = pd.read_csv(dataset.test_path, dtype={dataset.id_column: str}, parse_dates=[dataset.timestamp_column]) + return train_data, test_data + + utils = load_amlb_module("amlb.utils") # unorthodox for it's only now that we can safely import those functions from amlb.utils import * diff --git a/mkdocs.yml b/mkdocs.yml index 5f2dd0f6c..831f69efe 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,6 +27,7 @@ nav: - extending/constraint.md - Frameworks: extending/framework.md - FAQ: faq.md + - Contributing: CONTRIBUTING.md extra_css: - stylesheets/extra.css diff --git a/requirements-dev.txt b/requirements-dev.txt index 5939f4af5..600256292 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,3 +7,6 @@ types-xmltodict pandas-stubs boto3-stubs mypy + +# documentation +mkdocs-material \ No newline at end of file diff --git a/requirements.in b/requirements.in index 19adef7d2..02f64a594 100644 --- a/requirements.in +++ b/requirements.in @@ -11,3 +11,7 @@ scikit-learn>=1.0,<2.0 pyarrow>=11.0 # tables>=3.6 + +# Allow loading datasets from S3 +fsspec +s3fs diff --git a/requirements.txt b/requirements.txt index 52d6e87a6..2cee4f1c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ boto3==1.26.98 botocore==1.29.98 # via # boto3 + # s3fs # s3transfer certifi==2022.12.7 # via @@ -18,6 +19,10 @@ charset-normalizer==3.1.0 # via requests filelock==3.12.0 # via -r requirements.in +fsspec==2023.6.0 + # via + # -r requirements.in + # s3fs idna==3.4 # via requests jmespath==1.0.1 @@ -65,6 +70,8 @@ ruamel-yaml==0.17.21 # via -r requirements.in ruamel-yaml-clib==0.2.7 # via ruamel-yaml +s3fs==0.4.2 + # via -r requirements.in s3transfer==0.6.0 # via boto3 scikit-learn==1.2.2 diff --git a/resources/frameworks.yaml b/resources/frameworks.yaml index de3e4aaa6..da2881ce0 100644 --- a/resources/frameworks.yaml +++ b/resources/frameworks.yaml @@ -203,6 +203,16 @@ TPOT: # population_size: 25 # verbosity: 2 +FEDOT: + version: 'master' + description: | + FEDOT is a AutoML tool that optimizes composite machine learning pipelines using evolutionary optimisation. + project: https://github.com/aimclub/FEDOT + refs: + - https://doi.org/10.1016/j.future.2021.08.022 +# params: +# _save_artifacts: ['leaderboard', 'models', 'info'] + ####################################### ### Non AutoML reference frameworks ### ####################################### diff --git a/resources/frameworks_2023Q2.yaml b/resources/frameworks_2023Q2.yaml index af4e46848..1ac098b6f 100644 --- a/resources/frameworks_2023Q2.yaml +++ b/resources/frameworks_2023Q2.yaml @@ -97,8 +97,7 @@ mlr3automl: project: https://github.com/a-hanf/mlr3automl NaiveAutoML: - repo: https://github.com/pgijsbers/naiveautoml - version: '#182f5148e9d360ad92254fe47c12fc35d9fabd62' + version: '0.0.27' TPOT: version: '0.12.0' diff --git a/resources/frameworks_latest.yaml b/resources/frameworks_latest.yaml index d56b14dac..44f924232 100644 --- a/resources/frameworks_latest.yaml +++ b/resources/frameworks_latest.yaml @@ -86,6 +86,9 @@ oboe: TPOT: version: 'latest' +FEDOT: + version: 'latest' + ####################################### ### Non AutoML reference frameworks ### ####################################### diff --git a/resources/frameworks_stable.yaml b/resources/frameworks_stable.yaml index 3de7da369..d6b5a1ce0 100644 --- a/resources/frameworks_stable.yaml +++ b/resources/frameworks_stable.yaml @@ -91,8 +91,8 @@ oboe: TPOT: version: 'stable' - - +FEDOT: + version: 'stable' ####################################### ### Non AutoML reference frameworks ### diff --git a/tests/unit/amlb/datasets/file/test_file_dataloader.py b/tests/unit/amlb/datasets/file/test_file_dataloader.py index 778cccdf7..b46379724 100644 --- a/tests/unit/amlb/datasets/file/test_file_dataloader.py +++ b/tests/unit/amlb/datasets/file/test_file_dataloader.py @@ -292,7 +292,7 @@ def test_load_timeseries_task_csv(file_loader): assert len(ds.repeated_abs_seasonal_error) == len(ds.test.data) assert len(ds.repeated_item_id) == len(ds.test.data) - assert pat.is_categorical_dtype(ds._dtypes[ds.id_column]) + assert pat.is_string_dtype(ds._dtypes[ds.id_column]) assert pat.is_datetime64_dtype(ds._dtypes[ds.timestamp_column]) assert pat.is_float_dtype(ds._dtypes[ds.target.name])