From 1f9ad2f33867789d734c9be9bbacaabe1e348884 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 12 Nov 2024 16:20:29 -0600 Subject: [PATCH] enforce wheel size limits, README formatting in CI (#17284) Contributes to https://github.com/rapidsai/build-planning/issues/110 Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI. * checks on wheel size (compressed), - *to be sure they're under PyPI limits* - *and to prompt discussion on PRs that significantly increase wheel sizes* * checks on README formatting - *to ensure they'll render properly as the PyPI project homepages* - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/* ## Notes for Reviewers ### How I tested this Initially set the size threshold for `libcudf` to a value that I knew it'd violate (75MB compressed, when the wheels are 400+ MB compressed). Saw CI fail as expected, and print a summary with the expected contents. ```text checking 'final_dist/libcudf_cu11-24.12.0a333-py3-none-manylinux_2_28_aarch64.whl' ----- package inspection summary ----- file size * compressed size: 0.4G * uncompressed size: 0.6G * compression space saving: 34.6% contents * directories: 164 * files: 1974 (2 compiled) size by extension * .so - 0.6G (97.0%) * .h - 6.7M (1.0%) * no-extension - 4.8M (0.7%) * .cuh - 3.8M (0.6%) * .hpp - 2.2M (0.3%) * .a - 1.1M (0.2%) * .inl - 0.8M (0.1%) * .cmake - 0.1M (0.0%) * .md - 8.3K (0.0%) * .py - 4.0K (0.0%) * .pc - 0.2K (0.0%) * .txt - 34.0B (0.0%) largest files * (0.6G) libcudf/lib64/libcudf.so * (3.3M) libcudf/bin/flatc * (1.0M) libcudf/lib64/libflatbuffers.a * (0.5M) libcudf/include/libcudf/rapids/libcudacxx/cuda/std/__atomic/functions/cuda_ptx_generated.h * (0.2M) libcudf_cu11-24.12.0a333.dist-info/RECORD ------------ check results ----------- 1. [distro-too-large-compressed] Compressed size 0.4G is larger than the allowed size (75.0M). errors found while checking: 1 ``` ([build link](https://github.com/rapidsai/cudf/actions/runs/11748370606/job/32732391718?pr=17284#step:13:3062)) Updated that threshold in `python/libcudf/pyproject.toml`, and saw the build succeed (but the summary still printed). # Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17284 --- ci/build_wheel_cudf.sh | 2 ++ ci/build_wheel_cudf_polars.sh | 1 + ci/build_wheel_dask_cudf.sh | 1 + ci/build_wheel_libcudf.sh | 2 ++ ci/build_wheel_pylibcudf.sh | 2 ++ ci/validate_wheel.sh | 21 +++++++++++++++++++++ python/cudf/pyproject.toml | 8 ++++++++ python/cudf_kafka/pyproject.toml | 8 ++++++++ python/cudf_polars/pyproject.toml | 8 ++++++++ python/custreamz/pyproject.toml | 8 ++++++++ python/dask_cudf/pyproject.toml | 8 ++++++++ python/libcudf/pyproject.toml | 8 ++++++++ python/pylibcudf/pyproject.toml | 8 ++++++++ 13 files changed, 85 insertions(+) create mode 100755 ci/validate_wheel.sh diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index ae4eb0d5c66..32dd5a7fa62 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -27,4 +27,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/build_wheel_cudf_polars.sh b/ci/build_wheel_cudf_polars.sh index 79853cdbdb2..38048125247 100755 --- a/ci/build_wheel_cudf_polars.sh +++ b/ci/build_wheel_cudf_polars.sh @@ -6,6 +6,7 @@ set -euo pipefail package_dir="python/cudf_polars" ./ci/build_wheel.sh cudf-polars ${package_dir} +./ci/validate_wheel.sh ${package_dir} dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_dask_cudf.sh b/ci/build_wheel_dask_cudf.sh index 00c64afa2ef..b0ae2f23abc 100755 --- a/ci/build_wheel_dask_cudf.sh +++ b/ci/build_wheel_dask_cudf.sh @@ -6,6 +6,7 @@ set -euo pipefail package_dir="python/dask_cudf" ./ci/build_wheel.sh dask-cudf ${package_dir} +./ci/validate_wheel.sh ${package_dir} dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh index aabd3814a24..af49942c8cd 100755 --- a/ci/build_wheel_libcudf.sh +++ b/ci/build_wheel_libcudf.sh @@ -37,4 +37,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index c4a89f20f5f..5a8f3397714 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -25,4 +25,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 00000000000..5910a5c59fe --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index ca6dbddfecc..280dd52bb22 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -83,6 +83,14 @@ cudf-pandas-tests = [ Homepage = "https://github.com/rapidsai/cudf" Documentation = "https://docs.rapids.ai/api/cudf/stable/" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index ec0bc0eb22b..b2ea3f06e48 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -47,6 +47,14 @@ rapids = ["rmm", "cudf", "dask_cudf"] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"] +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 2e75dff5c9e..32ea142a96c 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -49,6 +49,14 @@ license-files = ["LICENSE"] [tool.setuptools.dynamic] version = {file = "cudf_polars/VERSION"} +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index d3baf3bf4d2..dd67a019c77 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -65,6 +65,14 @@ include = [ ] exclude = ["*tests*"] +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.ruff] extend = "../../pyproject.toml" diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index c4bfc3054bc..07d9143db36 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -81,6 +81,14 @@ section-order = ["future", "standard-library", "third-party", "dask", "rapids", dask = ["dask", "distributed", "dask_cuda"] rapids = ["rmm", "cudf"] +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 62726bb0df4..8c650eb2144 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -48,6 +48,14 @@ Homepage = "https://github.com/rapidsai/cudf" [project.entry-points."cmake.prefix"] libcudf = "libcudf" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 600 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '525M' + [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 83ed95823da..e83db47830c 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -85,6 +85,14 @@ rapids = ["rmm"] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"] +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] # --import-mode=importlib because two test_json.py exists and tests directory is not a structured module addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"