Skip to content

Commit

Permalink
enforce wheel size limits, README formatting in CI (#17284)
Browse files Browse the repository at this point in the history
Contributes to rapidsai/build-planning#110

Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI.

* checks on wheel size (compressed),
  - *to be sure they're under PyPI limits*
  - *and to prompt discussion on PRs that significantly increase wheel sizes*
* checks on README formatting
  - *to ensure they'll render properly as the PyPI project homepages*
  - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/*

## Notes for Reviewers

### How I tested this

Initially set the size threshold for `libcudf` to a value that I knew it'd violate (75MB compressed, when the wheels are 400+ MB compressed).

Saw CI fail as expected, and print a summary with the expected contents.

```text
checking 'final_dist/libcudf_cu11-24.12.0a333-py3-none-manylinux_2_28_aarch64.whl'
----- package inspection summary -----
file size
  * compressed size: 0.4G
  * uncompressed size: 0.6G
  * compression space saving: 34.6%
contents
  * directories: 164
  * files: 1974 (2 compiled)
size by extension
  * .so - 0.6G (97.0%)
  * .h - 6.7M (1.0%)
  * no-extension - 4.8M (0.7%)
  * .cuh - 3.8M (0.6%)
  * .hpp - 2.2M (0.3%)
  * .a - 1.1M (0.2%)
  * .inl - 0.8M (0.1%)
  * .cmake - 0.1M (0.0%)
  * .md - 8.3K (0.0%)
  * .py - 4.0K (0.0%)
  * .pc - 0.2K (0.0%)
  * .txt - 34.0B (0.0%)
largest files
  * (0.6G) libcudf/lib64/libcudf.so
  * (3.3M) libcudf/bin/flatc
  * (1.0M) libcudf/lib64/libflatbuffers.a
  * (0.5M) libcudf/include/libcudf/rapids/libcudacxx/cuda/std/__atomic/functions/cuda_ptx_generated.h
  * (0.2M) libcudf_cu11-24.12.0a333.dist-info/RECORD
------------ check results -----------
1. [distro-too-large-compressed] Compressed size 0.4G is larger than the allowed size (75.0M).
errors found while checking: 1
```

([build link](https://github.com/rapidsai/cudf/actions/runs/11748370606/job/32732391718?pr=17284#step:13:3062))

Updated that threshold in `python/libcudf/pyproject.toml`, and saw the build succeed (but the summary still printed).

#

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #17284
  • Loading branch information
jameslamb authored Nov 12, 2024
1 parent 796de4b commit 1f9ad2f
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
1 change: 1 addition & 0 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -euo pipefail
package_dir="python/cudf_polars"

./ci/build_wheel.sh cudf-polars ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
1 change: 1 addition & 0 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -euo pipefail
package_dir="python/dask_cudf"

./ci/build_wheel.sh dask-cudf ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
2 changes: 2 additions & 0 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
2 changes: 2 additions & 0 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
21 changes: 21 additions & 0 deletions ci/validate_wheel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir=$1
wheel_dir_relative_path=$2

cd "${package_dir}"

rapids-logger "validate packages with 'pydistcheck'"

pydistcheck \
--inspect \
"$(echo ${wheel_dir_relative_path}/*.whl)"

rapids-logger "validate packages with 'twine'"

twine check \
--strict \
"$(echo ${wheel_dir_relative_path}/*.whl)"
8 changes: 8 additions & 0 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ cudf-pandas-tests = [
Homepage = "https://github.com/rapidsai/cudf"
Documentation = "https://docs.rapids.ai/api/cudf/stable/"

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/cudf_kafka/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ rapids = ["rmm", "cudf", "dask_cudf"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ license-files = ["LICENSE"]
[tool.setuptools.dynamic]
version = {file = "cudf_polars/VERSION"}

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/custreamz/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ include = [
]
exclude = ["*tests*"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.ruff]
extend = "../../pyproject.toml"

Expand Down
8 changes: 8 additions & 0 deletions python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ section-order = ["future", "standard-library", "third-party", "dask", "rapids",
dask = ["dask", "distributed", "dask_cuda"]
rapids = ["rmm", "cudf"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/libcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ Homepage = "https://github.com/rapidsai/cudf"
[project.entry-points."cmake.prefix"]
libcudf = "libcudf"

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 600 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '525M'

[tool.scikit-build]
build-dir = "build/{wheel_tag}"
cmake.build-type = "Release"
Expand Down
8 changes: 8 additions & 0 deletions python/pylibcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ rapids = ["rmm"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
# --import-mode=importlib because two test_json.py exists and tests directory is not a structured module
addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"
Expand Down

0 comments on commit 1f9ad2f

Please sign in to comment.