TEST-#7441: Correctly skip sanity tests if we don't need them. #16264
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
on: | |
pull_request: | |
paths: | |
# NOTE: keep these paths in sync with the paths that trigger the | |
# fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml | |
- .github/workflows/** | |
- .github/actions/** | |
- '!.github/workflows/push-to-main.yml' | |
- asv_bench/** | |
- modin/** | |
- requirements/** | |
- scripts/** | |
- environment-dev.yml | |
- requirements-dev.txt | |
- setup.cfg | |
- setup.py | |
- versioneer.py | |
push: | |
schedule: | |
- cron: "30 2 * * WED" | |
- cron: "30 2 * * THU" | |
concurrency: | |
# Cancel other jobs in the same branch. We don't care whether CI passes | |
# on old commits. | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} | |
env: | |
MODIN_GITHUB_CI: true | |
jobs: | |
python-filter: | |
runs-on: ubuntu-latest | |
outputs: | |
python-version: ${{ steps.choose.outputs.python-version }} | |
steps: | |
- id: choose | |
run: | | |
if [[ "${{ github.event.schedule }}" = "30 2 * * WED" ]] | |
then | |
echo "python-version=3.10" >> "$GITHUB_OUTPUT" | |
elif [[ "${{ github.event.schedule }}" = "30 2 * * THU" ]] | |
then | |
echo "python-version=3.11" >> "$GITHUB_OUTPUT" | |
else | |
echo "python-version=3.9" >> "$GITHUB_OUTPUT" | |
fi | |
lint-mypy: | |
needs: [python-filter] | |
name: lint (mypy) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/python-only | |
with: | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- run: pip install -r requirements-dev.txt | |
- run: mypy --config-file mypy.ini | |
lint-flake8: | |
needs: [python-filter] | |
name: lint (flake8) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/python-only | |
with: | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
# NOTE: If you are changing the set of packages installed here, make sure that | |
# the dev requirements match them. | |
- run: pip install flake8 flake8-print flake8-no-implicit-concat | |
# NOTE: keep the flake8 command here in sync with the pre-commit hook in | |
# /contributing/pre-commit | |
- run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py | |
test-api-and-no-engine: | |
needs: [python-filter] | |
name: Test API, headers and no-engine mode | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: requirements/requirements-no-engine.yml | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- run: python -m pytest modin/tests/pandas/test_api.py | |
- run: python -m pytest modin/tests/test_executions_api.py | |
- run: python -m pytest modin/tests/test_headers.py | |
- run: python -m pytest modin/tests/core/test_dispatcher.py::test_add_option | |
- uses: ./.github/actions/upload-coverage | |
test-clean-install: | |
needs: [lint-flake8, python-filter] | |
strategy: | |
matrix: | |
os: | |
- ubuntu | |
- windows | |
runs-on: ${{ matrix.os }}-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
name: test-clean-install-${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/python-only | |
with: | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- run: python -m pip install -e ".[all]" | |
- name: Ensure Ray and Dask engines start up | |
run: | | |
MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
- name: Ensure MPI engine start up | |
# Install a working MPI implementation beforehand so mpi4py can link to it | |
run: | | |
sudo apt-get update | |
sudo apt-get install software-properties-common | |
sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu jammy main universe restricted multiverse" | |
sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu jammy-updates main universe restricted multiverse" | |
sudo add-apt-repository "deb http://security.ubuntu.com/ubuntu jammy-security main universe restricted multiverse" | |
sudo apt-get update | |
sudo apt-get install libmpich-dev=4.0-3 libmpich12=4.0-3 mpich=4.0-3 | |
python -m pip install -e ".[mpi]" | |
MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
if: matrix.os == 'ubuntu' | |
test-internals: | |
needs: [lint-flake8, python-filter] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
name: test-internals | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- name: Internals tests | |
run: python -m pytest modin/tests/core/test_dispatcher.py | |
- run: python -m pytest modin/tests/config | |
- run: python -m pytest modin/tests/test_envvar_catcher.py | |
- run: python -m pytest modin/tests/core/storage_formats/base/test_internals.py | |
- run: python -m pytest modin/tests/core/storage_formats/pandas/test_internals.py | |
- run: python -m pytest modin/tests/test_envvar_npartitions.py | |
- run: python -m pytest modin/tests/test_utils.py | |
- run: python -m pytest asv_bench/test/test_utils.py | |
- run: python -m pytest modin/tests/interchange/dataframe_protocol/base | |
- run: python -m pytest modin/tests/test_dataframe_api_standard.py | |
- run: python -m pytest modin/tests/test_logging.py | |
- uses: ./.github/actions/upload-coverage | |
test-defaults: | |
needs: [lint-flake8, python-filter] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
execution: [BaseOnPython] | |
env: | |
MODIN_TEST_DATASET_SIZE: "small" | |
name: Test ${{ matrix.execution }} execution, Python ${{ needs.python-filter.outputs.python-version }}" | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- name: xgboost tests | |
run: | | |
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost | |
# when we use collective instead of rabit. | |
# Per the thread https://github.com/conda-forge/miniforge/issues/513, | |
# remove unused conda packages and caches to avoid `Found incorrect | |
# download: joblib` error from mamba. | |
mamba clean --all | |
mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge | |
python -m pytest modin/tests/experimental/xgboost/test_default.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/tests/core/storage_formats/base/test_internals.py --execution=${{ matrix.execution }} | |
- uses: ./.github/actions/run-core-tests | |
with: | |
runner: python -m pytest --execution=${{ matrix.execution }} | |
- uses: ./.github/actions/upload-coverage | |
test-asv-benchmarks: | |
if: github.event_name == 'pull_request' | |
needs: [lint-flake8] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: ray | |
MODIN_MEMORY: 1000000000 | |
MODIN_TEST_DATASET_SIZE: small | |
name: test-asv-benchmarks | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 1 | |
- uses: conda-incubator/setup-miniconda@v3 | |
with: | |
auto-activate-base: true | |
activate-environment: "" | |
miniforge-variant: Miniforge3 | |
miniforge-version: latest | |
use-mamba: true | |
- name: Running benchmarks | |
run: | | |
git remote add upstream https://github.com/modin-project/modin.git | |
git fetch upstream | |
if git diff upstream/main --name-only | grep -q "^asv_bench/"; then | |
cd asv_bench | |
mamba env create -f ../environment-dev.yml | |
conda activate modin | |
pip install .. | |
asv machine --yes | |
# check Modin on Ray | |
asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \ | |
-b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log | |
# check pure pandas | |
MODIN_ASV_USE_IMPL=pandas asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \ | |
-b ^benchmarks -b ^io | tee benchmarks.log | |
else | |
echo "Benchmarks did not run, no changes detected" | |
fi | |
if: always() | |
- name: Publish benchmarks artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Benchmarks log | |
path: asv_bench/benchmarks.log | |
include-hidden-files: true | |
if: failure() | |
execution-filter: | |
# Choose which executions we want to run all tests for on a pull request. | |
# We always test 'native' and 'python' executions completely because they | |
# are fast, but we only test ray, dask, and unidist, if we think this pull | |
# request is affecting how we execute with those engines specifically. | |
runs-on: ubuntu-latest | |
outputs: | |
ray: ${{ steps.filter.outputs.ray }} | |
dask: ${{ steps.filter.outputs.dask }} | |
unidist: ${{ steps.filter.outputs.unidist }} | |
engines: ${{ steps.engines.outputs.engines }} | |
experimental: ${{ steps.experimental.outputs.experimental }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: dorny/paths-filter@v3 | |
id: filter | |
with: | |
filters: | | |
shared: &shared | |
- 'modin/core/execution/dispatching/**' | |
ray: | |
- *shared | |
- 'modin/core/execution/ray/**' | |
dask: | |
- *shared | |
- 'modin/core/execution/dask/**' | |
unidist: | |
- *shared | |
- 'modin/core/execution/unidist/**' | |
experimental: | |
- 'modin/experimental/**' | |
- uses: actions/setup-python@v5 | |
- id: engines | |
run: | | |
python -c "import sys, json; print('engines=' + json.dumps(['python', 'native'] + (sys.argv[1] == 'true' and ['ray'] or []) + (sys.argv[2] == 'true' and ['dask'] or []) ))" \ | |
"${{ steps.filter.outputs.ray }}" "${{ steps.filter.outputs.dask }}" >> $GITHUB_OUTPUT | |
test-all-unidist: | |
needs: [lint-flake8, execution-filter, python-filter] | |
if: github.event_name == 'push' || needs.execution-filter.outputs.unidist == 'true' | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: [ "${{ needs.python-filter.outputs.python-version }}" ] | |
unidist-backend: ["mpi"] | |
env: | |
MODIN_ENGINE: "Unidist" | |
UNIDIST_BACKEND: ${{matrix.unidist-backend}} | |
# Only test reading from SQL server and postgres on ubuntu for now. | |
# Eventually, we should test on Windows, too, but we will have to set up | |
# the servers differently. | |
MODIN_TEST_READ_FROM_SQL_SERVER: true | |
MODIN_TEST_READ_FROM_POSTGRES: true | |
name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}}) | |
services: | |
moto: | |
image: motoserver/moto:5.0.13 | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: requirements/env_unidist_linux.yml | |
activate-environment: modin_on_unidist | |
python-version: ${{matrix.python-version}} | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- name: Set up postgres | |
# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from | |
# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3 | |
run: | | |
sudo docker pull postgres | |
sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres | |
- run: mpiexec -n 1 python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py | |
- run: mpiexec -n 1 python -m pytest modin/tests/test_partition_api.py | |
- uses: ./.github/actions/run-core-tests | |
with: | |
runner: mpiexec -n 1 python -m pytest | |
parallel: "" | |
- run: mpiexec -n 1 python -m pytest modin/tests/numpy | |
- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh | |
- run: ./.github/workflows/sql_server/set_up_sql_server.sh | |
# need an extra argument "genv" to set environment variables for mpiexec. We need | |
# these variables to test writing to the mock s3 filesystem. | |
- uses: nick-fields/retry@v3 | |
# to avoid issues with non-stable `to_csv` tests for unidist on MPI backend. | |
# for details see: https://github.com/modin-project/modin/pull/6776 | |
with: | |
timeout_minutes: 15 | |
max_attempts: 3 | |
command: | | |
conda run --no-capture-output -n modin_on_unidist mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key \ | |
-genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/tests/pandas/test_io.py --verbose | |
- run: | | |
mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret \ | |
python -m pytest modin/tests/experimental/test_io_exp.py | |
- run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py | |
- run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py | |
- run: | | |
python -m pip install lazy_import | |
mpiexec -n 1 python -m pytest modin/tests/pandas/integrations/ | |
- uses: ./.github/actions/upload-coverage | |
test-all: | |
needs: [lint-flake8, execution-filter, python-filter] | |
strategy: | |
matrix: | |
os: | |
- ubuntu | |
- windows | |
python-version: [ "${{ needs.python-filter.outputs.python-version }}" ] | |
# On push, run the tests for all engines. Otherwise, for pull requests, | |
# only run tests for engines that depend on files changed in this PR. | |
engine: ${{ fromJSON( (github.event_name == 'push' && '["python", "ray", "dask", "native"]') || needs.execution-filter.outputs.engines ) }} | |
test_task: | |
- group_1 | |
- group_2 | |
- group_3 | |
- group_4 | |
exclude: # python and native engines only have one task group that contains all the tests | |
- engine: "python" | |
test_task: "group_2" | |
- engine: "native" | |
test_task: "group_2" | |
- engine: "python" | |
test_task: "group_3" | |
- engine: "native" | |
test_task: "group_3" | |
- engine: "python" | |
test_task: "group_4" | |
- engine: "native" | |
test_task: "group_4" | |
runs-on: ${{ matrix.os }}-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: ${{matrix.engine}} | |
# Only test reading from SQL server and postgres on ubuntu for now. | |
# Eventually, we should test on Windows, too, but we will have to set up | |
# the servers differently. | |
MODIN_TEST_READ_FROM_SQL_SERVER: ${{ matrix.os == 'ubuntu' }} | |
MODIN_TEST_READ_FROM_POSTGRES: ${{ matrix.os == 'ubuntu' }} | |
name: test-${{ matrix.os }} (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}}) | |
services: | |
# Using workaround https://github.com/actions/runner/issues/822#issuecomment-1524826092 | |
moto: | |
# we only need moto service on Ubuntu and for group_4 task, or for native or python engine. | |
image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')) && 'motoserver/moto:5.0.13' || '' }} | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- name: Set native storage format | |
run: echo "MODIN_STORAGE_FORMAT=Native" >> $GITHUB_ENV | |
if: matrix.engine == 'native' | |
- name: Limit ray memory | |
run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV | |
if: matrix.os == 'ubuntu' && matrix.engine == 'ray' | |
- name: Tell Modin to use existing ray cluster | |
run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV | |
if: matrix.os == 'windows' && matrix.engine == 'ray' | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
- name: Start local ray cluster | |
# Try a few times to start ray to work around | |
# https://github.com/modin-project/modin/issues/4562 | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 5 | |
max_attempts: 5 | |
command: ray start --head --port=6379 --object-store-memory=1000000000 | |
if: matrix.os == 'windows' && matrix.engine == 'ray' | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
if: matrix.os == 'ubuntu' | |
- name: Set up postgres | |
# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from | |
# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3 | |
run: | | |
sudo docker pull postgres | |
sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres | |
if: matrix.os == 'ubuntu' | |
# BEGIN partitioned execution tests. We run these tests along with group 1, | |
# or if we are on the "python" engine, which only has a single group. We | |
# skip these tests on the "native" engine, which does not use partitions. | |
- run: python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py | |
if: matrix.engine != 'native' && (matrix.engine == 'python' || matrix.test_task == 'group_1') | |
- run: python -m pytest modin/tests/test_partition_api.py | |
# Skip this test for python because we do not define unwrap_partitions() | |
# for python execution. | |
if: matrix.engine != 'native' && matrix.engine != 'python' && matrix.test_task == 'group_1' | |
- name: xgboost tests | |
run: | | |
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost | |
# when we use collective instead of rabit. | |
mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge | |
python -m pytest -n 2 \ | |
modin/tests/experimental/xgboost/test_default.py \ | |
modin/tests/experimental/xgboost/test_xgboost.py \ | |
modin/tests/experimental/xgboost/test_dmatrix.py | |
if: matrix.engine != 'native' && matrix.os != 'windows' && (matrix.engine == 'python' || matrix.test_task == 'group_1') | |
- run: python -m pytest -n 2 modin/tests/experimental/test_pipeline.py | |
if: matrix.engine != 'native' && (matrix.engine == 'python' || matrix.test_task == 'group_1') | |
# END partitioned execution tests. | |
# BEGIN test groups. | |
# Run all the tests in the corresponding group for this instance of the | |
# test matrix. For example, if we are in the matrix's 'group_4', run the | |
# tests for 'group_4'. For each of 'native' and 'python' engines, we run | |
# all tests in a single job, so we ignore the grouping. | |
- uses: ./.github/actions/run-core-tests/group_1 | |
with: | |
# When running with Ray engine on Windows using 2 pytest workers tests are failing in CI. | |
# See https://github.com/modin-project/modin/issues/7387. | |
parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }} | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_1' | |
- uses: ./.github/actions/run-core-tests/group_2 | |
with: | |
# When running with Ray engine on Windows using 2 pytest workers tests are failing in CI. | |
# See https://github.com/modin-project/modin/issues/7387. | |
parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }} | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_2' | |
- uses: ./.github/actions/run-core-tests/group_3 | |
with: | |
# When running with Ray engine on Windows using 2 pytest workers tests are failing in CI. | |
# See https://github.com/modin-project/modin/issues/7387. | |
parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }} | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_3' | |
- uses: ./.github/actions/run-core-tests/group_4 | |
with: | |
# When running with Ray engine on Windows using 2 pytest workers tests are failing in CI. | |
# See https://github.com/modin-project/modin/issues/7387. | |
parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }} | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/tests/numpy | |
# Native execution does not support the modin Numpy API. | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
# END test groups. | |
# BEGIN some tests that we run along with group 4 for engines other than | |
# 'native' and 'python'. 'native' and 'python' jobs will run these tests | |
# along with all other tests in a single group. | |
- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh | |
if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4') | |
- run: ./.github/workflows/sql_server/set_up_sql_server.sh | |
if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4') | |
# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail. | |
- run: python -m pytest modin/tests/pandas/test_io.py --verbose | |
timeout-minutes: 60 | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/tests/experimental/test_io_exp.py | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/tests/polars/test_dataframe.py | |
- run: | | |
python -m pip install lazy_import | |
python -m pytest modin/tests/pandas/integrations/ | |
if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4' | |
# END tests that run on group 4, or in the single group for 'native' and | |
# python' engines. | |
- uses: ./.github/actions/upload-coverage | |
- name: Stop local ray cluster | |
run: ray stop | |
if: matrix.os == 'windows' && matrix.engine == 'ray' | |
test-sanity: | |
# The "sanity" tests run on each pull request to test that a subset of the | |
# full tests work with the slower engines (ray, dask, and unidist-MPI). | |
needs: [lint-flake8, execution-filter, python-filter] | |
# If we don't need to run any sanity tests, the job matrix that we generate | |
# here gives a single job with all the matrix fields empty (that is, os, | |
# execution, etc. are not set, so we treat them as ""). | |
# so, if the matrix is going to be empty, we need to skip this job | |
# completely. This bizarre behavior is not in the official documentation, | |
# of GitHub actions matrices, but someone does mention it here: | |
# https://stackoverflow.com/a/77118991 | |
if: | | |
github.event_name == 'pull_request' && | |
( | |
needs.execution-filter.outputs.ray != 'true' || | |
needs.execution-filter.outputs.dask != 'true' || | |
needs.execution-filter.outputs.unidist != 'true' | |
) | |
strategy: | |
matrix: | |
os: | |
- ubuntu | |
- windows | |
python-version: [ "${{ needs.python-filter.outputs.python-version }}" ] | |
running-all-ray-tests: [ "${{ needs.execution-filter.outputs.ray }}" ] | |
running-all-dask-tests: [ "${{needs.execution-filter.outputs.dask}}" ] | |
running-all-unidist-tests: [ "${{needs.execution-filter.outputs.unidist}}" ] | |
execution: [ray, dask, unidist] | |
# If we're going to run all ray tests because we've detected a | |
# change to the ray engine, we don't need to run these sanity tests | |
# on ray. Likewise for dask and unidist. | |
exclude: | |
- running-all-ray-tests: 'true' | |
execution: ray | |
- running-all-dask-tests: 'true' | |
execution: dask | |
- running-all-unidist-tests: 'true' | |
execution: unidist | |
runs-on: ${{ matrix.os }}-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: ${{ matrix.execution }} | |
UNIDIST_BACKEND: "mpi" | |
PARALLEL: ${{ matrix.execution != 'unidist' && matrix.os != 'windows' && '-n 2' || '' }} | |
PYTEST_COMMAND: >- | |
${{ | |
( | |
(matrix.execution == 'ray' || matrix.execution == 'dask') && | |
'python -m pytest' | |
) || | |
( | |
matrix.execution == 'unidist' && | |
'mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest' | |
) || | |
'UNKNOWN_PYTEST_COMMAND' | |
}} | |
name: test-${{ matrix.os }}-sanity (engine ${{ matrix.execution }}, python ${{matrix.python-version}}) | |
services: | |
moto: | |
image: ${{ matrix.os != 'windows' && 'motoserver/moto:5.0.13' || '' }} | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: ${{ matrix.os == 'ubuntu' && matrix.execution == 'unidist' && 'requirements/env_unidist_linux.yml' || matrix.os == 'windows' && matrix.execution == 'unidist' && 'requirements/env_unidist_win.yml' || 'environment-dev.yml' }} | |
activate-environment: ${{ matrix.execution == 'unidist' && 'modin_on_unidist' || 'modin' }} | |
python-version: ${{matrix.python-version}} | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
if: matrix.os != 'windows' | |
- name: Limit ray memory | |
run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV | |
if: matrix.os != 'windows' && matrix.execution == 'ray' | |
- name: Tell Modin to use existing ray cluster | |
run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV | |
if: matrix.os == 'windows' && matrix.execution == 'ray' | |
- name: Start local ray cluster | |
# Try a few times to start ray to work around | |
# https://github.com/modin-project/modin/issues/4562 | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 5 | |
max_attempts: 5 | |
command: ray start --head --port=6379 --object-store-memory=1000000000 | |
if: matrix.os == 'windows' && matrix.execution == 'ray' | |
- run: MODIN_BENCHMARK_MODE=True $PYTEST_COMMAND modin/tests/pandas/internals/test_benchmark_mode.py | |
- run: $PYTEST_COMMAND $PARALLEL modin/tests/test_partition_api.py | |
- run: $PYTEST_COMMAND modin/tests/pandas/extensions | |
- name: xgboost tests | |
run: | | |
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost | |
# when we use collective instead of rabit. | |
mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge | |
$PYTEST_COMMAND $PARALLEL \ | |
modin/tests/experimental/xgboost/test_default.py \ | |
modin/tests/experimental/xgboost/test_xgboost.py \ | |
modin/tests/experimental/xgboost/test_dmatrix.py | |
if: matrix.os != 'windows' && needs.execution-filter.outputs.experimental == 'true' | |
- run: $PYTEST_COMMAND $PARALLEL modin/tests/experimental/test_pipeline.py | |
if: matrix.os != 'windows' && matrix.execution != 'unidist' && needs.execution-filter.outputs.experimental == 'true' | |
- name: "test DF: binary, default, iter" | |
run: | | |
$PYTEST_COMMAND $PARALLEL \ | |
modin/tests/pandas/dataframe/test_binary.py \ | |
modin/tests/pandas/dataframe/test_default.py \ | |
modin/tests/pandas/dataframe/test_iter.py | |
if: matrix.os != 'windows' | |
- name: "test DF: reduce, udf, window, pickle" | |
run: | | |
$PYTEST_COMMAND $PARALLEL \ | |
modin/tests/pandas/dataframe/test_reduce.py \ | |
modin/tests/pandas/dataframe/test_udf.py \ | |
modin/tests/pandas/dataframe/test_window.py \ | |
modin/tests/pandas/dataframe/test_pickle.py | |
if: matrix.os != 'windows' | |
- run: $PYTEST_COMMAND modin/tests/pandas/test_series.py | |
if: matrix.execution == 'ray' | |
- run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_series.py | |
if: matrix.execution != 'ray' | |
- run: $PYTEST_COMMAND modin/tests/pandas/dataframe/test_map_metadata.py | |
if: matrix.execution == 'ray' | |
- run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/dataframe/test_map_metadata.py | |
if: matrix.execution != 'ray' | |
- name: "test rolling, expanding, reshape, general, concat" | |
run: | | |
$PYTEST_COMMAND $PARALLEL \ | |
modin/tests/pandas/test_rolling.py \ | |
modin/tests/pandas/test_expanding.py \ | |
modin/tests/pandas/test_reshape.py \ | |
modin/tests/pandas/test_general.py \ | |
modin/tests/pandas/test_concat.py | |
if: matrix.os != 'windows' | |
- run: $PYTEST_COMMAND $PARALLEL modin/tests/numpy | |
- run: $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose | |
if: matrix.execution != 'unidist' | |
- uses: nick-fields/retry@v3 | |
# to avoid issues with non-stable `to_csv` tests for unidist on MPI backend. | |
# for details see: https://github.com/modin-project/modin/pull/6776 | |
with: | |
timeout_minutes: 15 | |
max_attempts: 3 | |
command: conda run --no-capture-output -n modin_on_unidist $PYTEST_COMMAND -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose | |
if: matrix.execution == 'unidist' | |
- run: $PYTEST_COMMAND modin/tests/experimental/test_io_exp.py | |
- run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/test_general.py | |
- run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py | |
- name: Stop local ray cluster | |
run: ray stop | |
if: matrix.os == 'windows' && matrix.execution == 'ray' | |
- uses: ./.github/actions/upload-coverage | |
test-experimental: | |
needs: [lint-flake8, python-filter] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: "python" | |
MODIN_EXPERIMENTAL: "True" | |
name: test experimental | |
services: | |
moto: | |
image: motoserver/moto:5.0.13 | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- run: python -m pytest -n 2 modin/tests/pandas/dataframe/test_map_metadata.py | |
- run: python -m pytest -n 2 modin/tests/pandas/test_series.py | |
# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail. | |
- run: python -m pytest modin/tests/pandas/test_io.py --verbose | |
- uses: ./.github/actions/upload-coverage | |
test-spreadsheet: | |
needs: [lint-flake8, python-filter] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: [ "${{ needs.python-filter.outputs.python-version }}" ] | |
engine: ["ray", "dask"] | |
env: | |
MODIN_EXPERIMENTAL: "True" | |
MODIN_ENGINE: ${{matrix.engine}} | |
name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}}) | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
- run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py | |
test-native-dataframe-interoperability: | |
needs: [ lint-flake8] | |
if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' }} | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.9"] | |
env: | |
# Test interoperability between PandasOnPython dataframes/series and | |
# native dataframes/series. | |
MODIN_ENGINE: "Python" | |
name: test-native-dataframe-interoperability python ${{matrix.python-version}}) | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/mamba-env | |
with: | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
- run: python -m pytest modin/tests/pandas/native_df_interoperability/ -n 2 | |
- uses: ./.github/actions/upload-coverage | |
merge-coverage-artifacts: | |
needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity, test-native-dataframe-interoperability] | |
if: always() # we need to run it regardless of some job being skipped, like in PR | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- name: Merge Artifacts | |
uses: actions/upload-artifact/merge@v4 | |
with: | |
name: coverage-data | |
pattern: coverage-data-* | |
include-hidden-files: true | |
delete-merged: true | |
upload-coverage: | |
needs: [merge-coverage-artifacts, python-filter] | |
if: always() # we need to run it regardless of some job being skipped, like in PR | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: ./.github/actions/python-only | |
with: | |
python-version: ${{ needs.python-filter.outputs.python-version }} | |
- name: Download coverage data | |
uses: actions/download-artifact@v4 | |
with: | |
name: coverage-data | |
- run: pip install coverage | |
- name: Combine coverage | |
run: python -m coverage combine | |
- name: Generate coverage report in xml format | |
run: python -m coverage xml | |
- uses: codecov/codecov-action@v4 | |
with: | |
fail_ci_if_error: ${{ github.event_name == 'push' }} # do not care about uploads in PR | |
token: ${{ secrets.CODECOV_TOKEN }} # this token is available at https://app.codecov.io/account/github/modin-project/ |