Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: [DO NOT MERGE] enforce wheel size limits, README formatting in CI #17284

Draft
wants to merge 9 commits into
base: branch-24.12
Choose a base branch
from
185 changes: 0 additions & 185 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,11 @@ jobs:
needs:
- changed-files
- checks
- conda-cpp-build
- conda-cpp-checks
- conda-cpp-tests
- conda-python-build
- conda-python-cudf-tests
- conda-python-other-tests
- conda-java-tests
- static-configure
- conda-notebook-tests
- docs-build
- wheel-build-libcudf
- wheel-build-pylibcudf
- wheel-build-cudf
- wheel-tests-cudf
- wheel-build-cudf-polars
- wheel-tests-cudf-polars
- cudf-polars-polars-tests
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
- unit-tests-cudf-pandas
- pandas-tests
- pandas-tests-diff
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: always()
Expand Down Expand Up @@ -94,91 +76,6 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-cpp-checks:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
enable_check_symbols: true
conda-cpp-tests:
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-python-cudf-tests:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_other.sh"
conda-java-tests:
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_java.sh"
static-configure:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
# Use the wheel container so we can skip conda solves and since our
# primary static consumers (Spark) are not in conda anyway.
container_image: "rapidsai/ci-wheel:latest"
run_script: "ci/configure_cpp_static.sh"
conda-notebook-tests:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
run_script: "ci/build_docs.sh"
wheel-build-libcudf:
needs: checks
secrets: inherit
Expand All @@ -202,14 +99,6 @@ jobs:
with:
build_type: pull-request
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-cudf-polars:
needs: wheel-build-pylibcudf
secrets: inherit
Expand All @@ -219,29 +108,6 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_cudf_polars.sh"
wheel-tests-cudf-polars:
needs: [wheel-build-cudf-polars, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_wheel_cudf_polars.sh"
cudf-polars-polars-tests:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars_polars_tests.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand All @@ -251,54 +117,3 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_dask_cudf.sh"
wheel-tests-dask-cudf:
needs: [wheel-build-dask-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
devcontainer:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
arch: '["amd64"]'
cuda: '["12.5"]'
build_command: |
sccache -z;
build-all -DBUILD_BENCHMARKS=ON --verbose;
sccache -s;
unit-tests-cudf-pandas:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
pandas-tests:
# run the Pandas unit tests using PR branch
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
test_summary_show: "none"
pandas-tests-diff:
# diff the results of running the Pandas unit tests and publish a job summary
needs: pandas-tests
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
node_type: cpu4
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
2 changes: 2 additions & 0 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
1 change: 1 addition & 0 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -euo pipefail
package_dir="python/cudf_polars"

./ci/build_wheel.sh cudf-polars ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
1 change: 1 addition & 0 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -euo pipefail
package_dir="python/dask_cudf"

./ci/build_wheel.sh dask-cudf ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
2 changes: 2 additions & 0 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
2 changes: 2 additions & 0 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
30 changes: 30 additions & 0 deletions ci/validate_wheel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir=$1
wheel_dir_relative_path=$2

# TODO: pre-install these in ci-imgs
rapids-logger "installing validation tools"
python -m pip install \
-v \
'pydistcheck==0.8.0' \
'twine>=5.0.0'

pyenv rehash

cd "${package_dir}"

rapids-logger "validate packages with 'pydistcheck'"

pydistcheck \
--inspect \
"$(echo ${wheel_dir_relative_path}/*.whl)"

rapids-logger "validate packages with 'twine'"

twine check \
--strict \
"$(echo ${wheel_dir_relative_path}/*.whl)"
8 changes: 8 additions & 0 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ cudf-pandas-tests = [
Homepage = "https://github.com/rapidsai/cudf"
Documentation = "https://docs.rapids.ai/api/cudf/stable/"

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/cudf_kafka/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ rapids = ["rmm", "cudf", "dask_cudf"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ license-files = ["LICENSE"]
[tool.setuptools.dynamic]
version = {file = "cudf_polars/VERSION"}

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/custreamz/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ include = [
]
exclude = ["*tests*"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.ruff]
extend = "../../pyproject.toml"

Expand Down
8 changes: 8 additions & 0 deletions python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ section-order = ["future", "standard-library", "third-party", "dask", "rapids",
dask = ["dask", "distributed", "dask_cuda"]
rapids = ["rmm", "cudf"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
addopts = "--tb=native --strict-config --strict-markers"
empty_parameter_set_mark = "fail_at_collect"
Expand Down
8 changes: 8 additions & 0 deletions python/libcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ Homepage = "https://github.com/rapidsai/cudf"
[project.entry-points."cmake.prefix"]
libcudf = "libcudf"

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 600 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.scikit-build]
build-dir = "build/{wheel_tag}"
cmake.build-type = "Release"
Expand Down
8 changes: 8 additions & 0 deletions python/pylibcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ rapids = ["rmm"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]

[tool.pydistcheck]
select = [
"distro-too-large-compressed",
]

# PyPI limit is 100 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '75M'

[tool.pytest.ini_options]
# --import-mode=importlib because two test_json.py exists and tests directory is not a structured module
addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"
Expand Down
Loading