From 460e03db50f41b2100688109d8caa73930419626 Mon Sep 17 00:00:00 2001 From: Lorenzo Stella Date: Fri, 8 Jul 2022 10:49:51 +0200 Subject: [PATCH] Backports for v0.10.1 (#2145) * Docs: Rework `installation` section. (#2130) * Fix DatasetCollection (#2135) * Fix `PandasDataset` for Python 3.9 (#2141) * Docs: Fix running tutorials for publishing docs. (#2138) * Docs: Fix running tutorials for publishing docs. * Update requirements. * Docs: Make notebook templates. (#2122) * Use of check_github_event. * Fix issues with hyperparameter tuning tutorial (#2143) * Apply black to notebooks. (#2144) Co-authored-by: Jasper Co-authored-by: rsnirwan --- .github/workflows/docs.yml | 3 - Justfile | 24 +-- docs/getting_started/install.md | 159 ++++++++++++++++++ docs/getting_started/install.rst | 78 --------- docs/md2ipynb.py | 113 +++++++++---- ...ut => howto_pytorch_lightning.md.template} | 0 ...nput => hp_tuning_with_optuna.md.template} | 2 - ...md.input => trainer_callbacks.md.template} | 0 ....md.input => pandasdataframes.md.template} | 0 ... => synthetic_data_generation.md.template} | 0 ...md.input => extended_tutorial.md.template} | 0 ...input => quick_start_tutorial.md.template} | 0 requirements/requirements-docs.txt | 3 + src/gluonts/dataset/__init__.py | 2 +- src/gluonts/dataset/pandas.py | 4 +- 15 files changed, 255 insertions(+), 133 deletions(-) create mode 100644 docs/getting_started/install.md delete mode 100644 docs/getting_started/install.rst rename docs/tutorials/advanced_topics/{howto_pytorch_lightning.md.input => howto_pytorch_lightning.md.template} (100%) rename docs/tutorials/advanced_topics/{hp_tuning_with_optuna.md.input => hp_tuning_with_optuna.md.template} (99%) rename docs/tutorials/advanced_topics/{trainer_callbacks.md.input => trainer_callbacks.md.template} (100%) rename docs/tutorials/data_manipulation/{pandasdataframes.md.input => pandasdataframes.md.template} (100%) rename docs/tutorials/data_manipulation/{synthetic_data_generation.md.input => synthetic_data_generation.md.template} (100%) rename docs/tutorials/forecasting/{extended_tutorial.md.input => extended_tutorial.md.template} (100%) rename docs/tutorials/forecasting/{quick_start_tutorial.md.input => quick_start_tutorial.md.template} (100%) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 758e533620..956bf6f09d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -6,9 +6,6 @@ defaults: run: shell: bash -env: - SKIP_BUILD_NOTEBOOK: ${{!( github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'pr:docs-build-notebook'))}} - jobs: docs-build: runs-on: ubuntu-latest diff --git a/Justfile b/Justfile index ff6c1bfed1..6d64f73e44 100644 --- a/Justfile +++ b/Justfile @@ -15,30 +15,18 @@ # specific language governing permissions and limitations # under the License. -ROOTDIR := invocation_directory() +ROOTDIR := justfile_directory() MD2IPYNB := ROOTDIR + "/docs/md2ipynb.py" -skip_build_notebook := env_var_or_default("SKIP_BUILD_NOTEBOOK", "false") - - -docs: release +docs: compile_notebooks make -C docs html # SPHINXOPTS=-W clean: git clean -ff -d -x --exclude="{{ROOTDIR}}/tests/externaldata/*" --exclude="{{ROOTDIR}}/tests/data/*" --exclude="{{ROOTDIR}}/conda/" -compile_notebooks: - if [ {{skip_build_notebook}} = "true" ] ; \ - then \ - find docs/tutorials/**/*.md.input | sed 'p;s/\.input//' | xargs -n2 cp; \ - else \ - python -m ipykernel install --user --name docsbuild; \ - python {{MD2IPYNB}} --kernel docsbuild "docs/tutorials/**/*.md.input"; \ - fi; - -dist_notebooks: compile_notebooks - cd docs/tutorials && \ - find * -type d -prune | grep -v 'tests\|__pycache__' | xargs -t -n 1 -I{} zip --no-dir-entries -r {}.zip {} -x "*.md" -x "__pycache__" -x "*.pyc" -x "*.txt" -x "*.log" -x "*.params" -x "*.npz" -x "*.json" +compile_notebooks mode="release": + python -m ipykernel install --user --name docsbuild + python {{MD2IPYNB}} --kernel docsbuild docs/tutorials/**/*.md.template --mode {{mode}} -release: dist_notebooks +release: python setup.py sdist diff --git a/docs/getting_started/install.md b/docs/getting_started/install.md new file mode 100644 index 0000000000..ed37e4b2e1 --- /dev/null +++ b/docs/getting_started/install.md @@ -0,0 +1,159 @@ + +# Installation + +GluonTS is available from PyPi via: + +```sh +pip install gluonts +```` + +```{attention} +**GluonTS uses a minimal dependency model.** + +This means that to use most models and features additional dependencies need to +be installed. See the next section for more information. + +``` + +## Optional and Extra Dependencies + +Python has the notion of [extras](https://peps.python.org/pep-0508/#extras) +-- dependencies that can be optionally installed to unlock certain features of +a pacakge. + +When installing a package, they are passed via ``[...]`` after the package +name: + +```sh +pip install some-package[extra-1,extra-2] +```` + +We make extensive use of optional dependencies in GluonTS to keep the amount of +required dependencies minimal. To still allow users to opt-in to certain +features, we expose many extra dependencies. + +For example, we offer support for reading and writing Arrow and Parquet based +datasets using [Apache Arrow](https://arrow.apache.org/). However, it is a +hefty dependency to require, especially if one has no need for it. Thus, we +offer the ``arrow``-extra, which installs the required packages and can be +simply enabled using: + +```sh +pip install gluonts[arrow] +```` + +### Models + + +#### PyTorch + +Models written using [PyTorch](https://pytorch.org/) are available via the +``gluonts.torch`` subpackage. + +In addition to PyTorch we require [PyTorch Lightning](https://www.pytorchlightning.ai/) +to be installed as well. + +Both required dependencies are included in the ``torch``-extra: + +```sh +pip install gluonts[torch] +```` + + +#### MXNet + +MXNet based models require a version of ``mxnet`` to be installed. + +```{note} + +MXNet provives different package for CPU and GPU usages. Please refer to its +[documentation](https://mxnet.apache.org/versions/1.9.1/get_started?) to +select the right version fitting your use-case. + +``` + +The ``mxnet``-extra will install a CPU-only version: + +```sh +pip install gluonts[mxnet] + +```` + + +#### 3rd Party + +##### R-Forecast + +GluonTS includes a thin wrapper for calling the ``R`` `forecast` package. + +In order to use it you need to install [``R``](https://www.r-project.org/) and +install the `forecast` package: + +```sh +R -e 'install.packages(c("forecast", "nnfor"), repos="https://cloud.r-project.org")' +``` + +In addition, we require rpy2 to be installed: + +```sh +pip install 'rpy2>=2.9.*,<3.*' +```` + +##### Prophet + +The [Prophet](https://facebook.github.io/prophet/) forecasting library is +available via `gluonts.model.prophet` and requires the ``prophet`` package to +be installed. + +The ``prophet``-extra also depends on it: + +```sh +pip install gluonts[prophet] +``` + + +### Datasets + +#### JSON + +Since Python's build in ``json`` package is known to be relatively slow, we use +faster implementations if available: ``orjson`` (recommended) and ``ujson``. + +You can install ``orjson`` via: + +```sh +pip install orjson +``` + +```{hint} +GluonTS will emit a warning if neither ``orjson`` nor ``ujson`` are installed. +There is no functional difference between the different implementations, but +especially when working with larger datasets, performance can be notably +impacted when relying on the default ``json`` package. +``` + +#### Arrow + +GluonTS support [Parquet](https://en.wikipedia.org/wiki/Apache_Parquet) files +using [``PyArrow``](https://arrow.apache.org/docs/python/index.html). + +Further, [arrow's custom data formats](https://arrow.apache.org/docs/python/ipc.html) +are also supported. + +To utilise these, either install the ``pyarrow`` package or use the +``arrow``-extra: + +```sh +pip install gluonts[arrow] +``` + +### Other + +#### Shell + +The ``shell`` module offers integration with Amazon SageMaker and is available +through: + +```sh +pip install gluonts[shell] +``` diff --git a/docs/getting_started/install.rst b/docs/getting_started/install.rst deleted file mode 100644 index bc119ebdc1..0000000000 --- a/docs/getting_started/install.rst +++ /dev/null @@ -1,78 +0,0 @@ -Installation -============ - -GluonTS relies on the recent version of MXNet. The easiest way to install MXNet -is through `pip `_. The following -command installs the latest version of MXNet. - -.. code-block:: console - - pip install --upgrade mxnet~=1.7 - -.. note:: - - There are other pre-build MXNet packages that enable GPU supports and - accelerate CPU performance, please refer to `this page - `_ for details. Some - training scripts are recommended to run on GPUs, if you don't have a GPU - machine at hand, you may consider `running on AWS - `_. - - -After installing MXNet, you can install the GluonTS toolkit by - -.. code-block:: console - - pip install gluonts - - -Install from Dev Branch ------------------------ - -If you are interested in trying out features on dev branch that hasn't been released yet, you have -the option of installing from dev branch directly. - - -Install from GitHub -------------------- - -Use the following command to automatically download and install the current code on dev branch: - -.. code-block:: console - - pip install git+https://github.com/awslabs/gluon-ts.git - - -Install from Source Code ------------------------- - -You can also first check out the code locally using Git: - -.. code-block:: console - - git clone https://github.com/awslabs/gluon-ts - cd gluon-ts - -then use the provided `setup.py` to install into site-packages: - -.. code-block:: console - - python setup.py install - - -.. note:: - - You may need to use `sudo` in case you run into permission denied error. - - -Alternatively, you can set up the package with development mode, so that local changes are -immediately reflected in the installed python package - -.. code-block:: console - - python setup.py develop - -.. note:: - - The dev branch may rely on MXNet nightly builds which are available on PyPI, - please refer to `this page `_ for installation guide. diff --git a/docs/md2ipynb.py b/docs/md2ipynb.py index 8ee425eee1..0186dd5745 100644 --- a/docs/md2ipynb.py +++ b/docs/md2ipynb.py @@ -1,60 +1,115 @@ -import argparse +import json +import re import sys import time +import os from itertools import chain from pathlib import Path +import black +import click +import jinja2 import nbformat import notedown from nbclient import NotebookClient +from jinja2 import Environment -def convert(path, kernel_name=None, timeout=40 * 60): - with path.open() as in_file: - notebook = notedown.MarkdownReader().read(in_file) +env = Environment() - print(f"=== {path.name} ", end="") - sys.stdout.flush() - start = time.time() +def check_github_event(default): + if "GITHUB_EVENT_PATH" not in os.environ: + return default + + with open(os.environ["GITHUB_EVENT_PATH"]) as infile: + event = json.load(infile) + + if "pull_request" in event: + for label in event["pull_request"]["labels"]: + if label["name"] == "pr:docs-build-notebook": + return default + + return "skip" + + return default + + +def run_notebook(text, kernel_name, timeout) -> str: + notebook = notedown.MarkdownReader().reads(text) + + kwargs = {} + if kernel_name is not None: + kwargs["kernel_name"] = kernel_name client = NotebookClient( notebook, - timeout=600, - kernel_name=kernel_name, + timeout=timeout, resources={"metadata": {"path": "."}}, + **kwargs, ) client.execute() - print(f"finished evaluation in {time.time() - start} sec") - # need to add language info to for syntax highlight notebook["metadata"].update(language_info={"name": "python"}) + return nbformat.writes(notebook) + + +def black_cells(text): + CODE_RE = r"```py(?:thon)?\s*\n(.*?)```" + + text = re.sub(r"^%", r"#%#", text, flags=re.M) + + def apply_black(match): + code = match.group(1) + + formatted = black.format_str(code, mode=black.Mode()) + + return "\n".join(["```", formatted.rstrip(), "```"]) + + formatted = re.sub(CODE_RE, apply_black, text, flags=re.S) + return re.sub(r"^#%#", r"%", formatted, flags=re.M) + + +def convert(path, mode, kernel_name=None, timeout=40 * 60): + print(f"=== {path.name} ", end="") + sys.stdout.flush() + + with path.open() as in_file: + template = env.from_string(in_file.read()) + + markdown = template.render(mode=mode) + markdown = black_cells(markdown) + + if mode != "skip": + suffix = ".ipynb" + start = time.time() + output = run_notebook(markdown, kernel_name, timeout) + print(f"finished evaluation in {time.time() - start} sec") + else: + suffix = ".md" + print(f"convert to {suffix}") + output = markdown + # XXX.md.input -> XXX.ipynb # `with_suffix` only operates on last suffix, so we need some more involved # logic. stem = path.name.split(".", 1)[0] - nbformat.write(notebook, path.with_name(stem).with_suffix(".ipynb")) + with path.with_name(stem).with_suffix(suffix).open("w") as outfile: + outfile.write(output) -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-k", - "--kernel", - dest="kernel_name", - default=None, - help="name of ipython kernel to use", - ) - parser.add_argument( - "files", type=str, nargs="+", help="path to files to convert" - ) +@click.command() +@click.argument("paths", type=click.Path(), nargs=-1) +@click.option("--kernel", "-k", help="Name of iPython kernel to use.") +@click.option("--mode", "-m", default="release") +def cli(paths, kernel, mode): + mode = check_github_event(mode) - args = parser.parse_args() + for file in map(Path, paths): + convert(file, kernel_name=kernel, mode=mode) - here = Path(".") - files = list(chain.from_iterable(map(here.glob, args.files))) - for file in files: - convert(file, kernel_name=args.kernel_name) +if __name__ == "__main__": + cli() diff --git a/docs/tutorials/advanced_topics/howto_pytorch_lightning.md.input b/docs/tutorials/advanced_topics/howto_pytorch_lightning.md.template similarity index 100% rename from docs/tutorials/advanced_topics/howto_pytorch_lightning.md.input rename to docs/tutorials/advanced_topics/howto_pytorch_lightning.md.template diff --git a/docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.input b/docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.template similarity index 99% rename from docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.input rename to docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.template index 2b7c5d2b1e..8f1660b231 100644 --- a/docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.input +++ b/docs/tutorials/advanced_topics/hp_tuning_with_optuna.md.template @@ -8,8 +8,6 @@ In this notebook we will see how to tune the hyperparameters of a GlutonTS model ```python -import mxnet as mx -from mxnet import gluon import numpy as np import pandas as pd import matplotlib.pyplot as plt diff --git a/docs/tutorials/advanced_topics/trainer_callbacks.md.input b/docs/tutorials/advanced_topics/trainer_callbacks.md.template similarity index 100% rename from docs/tutorials/advanced_topics/trainer_callbacks.md.input rename to docs/tutorials/advanced_topics/trainer_callbacks.md.template diff --git a/docs/tutorials/data_manipulation/pandasdataframes.md.input b/docs/tutorials/data_manipulation/pandasdataframes.md.template similarity index 100% rename from docs/tutorials/data_manipulation/pandasdataframes.md.input rename to docs/tutorials/data_manipulation/pandasdataframes.md.template diff --git a/docs/tutorials/data_manipulation/synthetic_data_generation.md.input b/docs/tutorials/data_manipulation/synthetic_data_generation.md.template similarity index 100% rename from docs/tutorials/data_manipulation/synthetic_data_generation.md.input rename to docs/tutorials/data_manipulation/synthetic_data_generation.md.template diff --git a/docs/tutorials/forecasting/extended_tutorial.md.input b/docs/tutorials/forecasting/extended_tutorial.md.template similarity index 100% rename from docs/tutorials/forecasting/extended_tutorial.md.input rename to docs/tutorials/forecasting/extended_tutorial.md.template diff --git a/docs/tutorials/forecasting/quick_start_tutorial.md.input b/docs/tutorials/forecasting/quick_start_tutorial.md.template similarity index 100% rename from docs/tutorials/forecasting/quick_start_tutorial.md.input rename to docs/tutorials/forecasting/quick_start_tutorial.md.template diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt index 8022ccc46b..4f3a32913d 100644 --- a/requirements/requirements-docs.txt +++ b/requirements/requirements-docs.txt @@ -11,3 +11,6 @@ optuna~=2.10 furo==2022.6.4.1 m2r2 myst-parser +click +orjson +black diff --git a/src/gluonts/dataset/__init__.py b/src/gluonts/dataset/__init__.py index 6be7faaa76..d514686512 100644 --- a/src/gluonts/dataset/__init__.py +++ b/src/gluonts/dataset/__init__.py @@ -34,7 +34,7 @@ def __len__(self) -> int: @dataclass -class DatasetCollection(Dataset): +class DatasetCollection: """Flattened access to a collection of datasets.""" datasets: List[Dataset] diff --git a/src/gluonts/dataset/pandas.py b/src/gluonts/dataset/pandas.py index f13049932b..72ecbefc65 100644 --- a/src/gluonts/dataset/pandas.py +++ b/src/gluonts/dataset/pandas.py @@ -19,12 +19,12 @@ from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from toolz import valmap -from gluonts.dataset.common import Dataset, DataEntry, ProcessDataEntry +from gluonts.dataset.common import DataEntry, ProcessDataEntry from gluonts.dataset.field_names import FieldName @dataclass -class PandasDataset(Dataset): +class PandasDataset: """ A pandas.DataFrame-based dataset type.