TEST-#9999: Fix engine filter. #16248

Workflow file for this run

	name: ci
	on:
	pull_request:
	paths:
	# NOTE: keep these paths in sync with the paths that trigger the
	# fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml
	- .github/workflows/**
	- .github/actions/**
	- '!.github/workflows/push-to-main.yml'
	- asv_bench/**
	- modin/**
	- requirements/**
	- scripts/**
	- environment-dev.yml
	- requirements-dev.txt
	- setup.cfg
	- setup.py
	- versioneer.py
	push:
	schedule:
	- cron: "30 2 * * WED"
	- cron: "30 2 * * THU"
	concurrency:
	# Cancel other jobs in the same branch. We don't care whether CI passes
	# on old commits.
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
	env:
	MODIN_GITHUB_CI: true

	jobs:
	python-filter:
	runs-on: ubuntu-latest
	outputs:
	python-version: ${{ steps.choose.outputs.python-version }}
	steps:
	- id: choose
	run: \|
	if [[ "${{ github.event.schedule }}" = "30 2 * * WED" ]]
	then
	echo "python-version=3.10" >> "$GITHUB_OUTPUT"
	elif [[ "${{ github.event.schedule }}" = "30 2 * * THU" ]]
	then
	echo "python-version=3.11" >> "$GITHUB_OUTPUT"
	else
	echo "python-version=3.9" >> "$GITHUB_OUTPUT"
	fi

	# lint-flake8:
	# needs: [python-filter]
	# name: lint (flake8)
	# runs-on: ubuntu-latest
	# steps:
	# - uses: actions/checkout@v4
	# - uses: ./.github/actions/python-only
	# with:
	# python-version: ${{ needs.python-filter.outputs.python-version }}
	# # NOTE: If you are changing the set of packages installed here, make sure that
	# # the dev requirements match them.
	# - run: pip install flake8 flake8-print flake8-no-implicit-concat
	# # NOTE: keep the flake8 command here in sync with the pre-commit hook in
	# # /contributing/pre-commit
	# - run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py

	execution-filter:
	# Choose which executions we want to run all tests for on a pull request.
	# We always test 'native' and 'python' executions completely because they
	# are fast, but we only test ray, dask, and unidist, if we think this pull
	# request is affecting how we execute with those engines specifically.
	runs-on: ubuntu-latest
	outputs:
	ray: ${{ steps.filter.outputs.ray }}
	dask: ${{ steps.filter.outputs.dask }}
	unidist: ${{ steps.filter.outputs.unidist }}
	engines: ${{ steps.engines.outputs.engines }}
	experimental: ${{ steps.experimental.outputs.experimental }}
	steps:
	- uses: actions/checkout@v4
	- uses: dorny/paths-filter@v3
	id: filter
	with:
	filters: \|
	shared: &shared
	- 'modin/core/execution/dispatching/**'
	ray:
	- *shared
	- 'modin/core/execution/ray/**'
	dask:
	- *shared
	- 'modin/core/execution/dask/**'
	unidist:
	- *shared
	- 'modin/core/execution/unidist/**'
	experimental:
	- 'modin/experimental/**'
	- uses: actions/setup-python@v5
	- id: engines
	run: \|
	python -c "import sys, json; print('engines=' + json.dumps(['python', 'native'] + (sys.argv[1] == 'true' and ['ray'] or []) + (sys.argv[2] == 'true' and ['dask'] or []) ))" \
	"${{ steps.filter.outputs.ray }}" "${{ steps.filter.outputs.dask }}" >> $GITHUB_OUTPUT
	- id: show-variables
	run: echo ${{ steps.filter.outputs.ray }} , ${{ steps.filter.outputs.dask }} , ${{ steps.filter.outputs.unidist }} , ${{ steps.filter.outputs.experimental }}, ${{ steps.filter.outputs.test-native-dataframe-mode }}

	test-sanity:
	# The "sanity" tests run on each pull request to test that a subset of the
	# full tests work with the slower engines (ray, dask, and unidist-MPI).
	needs: [execution-filter, python-filter]
	# If we don't need any of these, we get a single job with an empty matrix
	# (that is, os, execution, etc. are not set and so we treat them as "").
	# so, if the matrix is going to be empty, we need to skip this job completely:
	# https://stackoverflow.com/a/77118991
	if: \|
	github.event_name == 'pull_request' &&
	(
	needs.execution-filter.outputs.ray != 'true' \|\|
	needs.execution-filter.outputs.dask != 'true' \|\|
	needs.execution-filter.outputs.unidist != 'true'
	)
	strategy:
	matrix:
	os:
	- ubuntu
	- windows
	python-version: [ "${{ needs.python-filter.outputs.python-version }}" ]

	running-all-ray-tests: [ "${{ needs.execution-filter.outputs.ray }}" ]
	running-all-dask-tests: [ "${{needs.execution-filter.outputs.dask}}" ]
	running-all-unidist-tests: [ "${{needs.execution-filter.outputs.unidist}}" ]
	execution: [ray, dask, unidist]
	include:
	- execution: ray
	shell-ex: "python -m pytest"
	- execution: dask
	shell-ex: "python -m pytest"
	- execution: unidist
	shell-ex: "mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest"
	# NOTE: include re-adds excluded jobs, so we have to put the include
	# before the exclude.
	exclude:
	- running-all-ray-tests: 'true'
	execution: ray
	- running-all-dask-tests: 'true'
	execution: dask
	- running-all-unidist-tests: 'true'
	execution: unidist

	# execution:
	# - name: ray
	# shell-ex: "python -m pytest"
	# # If we're going to run all ray tests because we've detected a
	# # change to the ray engine, we don't need to run these sanity tests
	# # on ray.
	# if: needs.execution-filter.outputs.ray != 'true'
	# - name: dask
	# shell-ex: "python -m pytest"
	# # If we're going to run all dask tests because we've detected a
	# # change to the dask engine, we don't need to run these sanity tests
	# # on dask.
	# if: needs.execution-filter.outputs.dask != 'true'
	# - name: unidist
	# shell-ex: "mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest"
	# # If we're going to run all unidist tests because we've detected a
	# # change to the unidist engine, we don't need to run these sanity tests
	# # on unidist.
	# if: needs.execution-filter.outputs.unidist != 'true'
	runs-on: ${{ matrix.os }}-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_ENGINE: ${{ matrix.execution }}
	UNIDIST_BACKEND: "mpi"
	PARALLEL: ${{ matrix.execution != 'unidist' && matrix.os != 'windows' && '-n 2' \|\| '' }}
	name: test-${{ matrix.os }}-sanity (engine ${{ matrix.execution }}, python ${{matrix.python-version}})
	services:
	moto:
	image: ${{ matrix.os != 'windows' && 'motoserver/moto:5.0.13' \|\| '' }}
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- id: show-variables
	run: echo ${{ needs.execution-filter.outputs.ray }} , ${{ needs.execution-filter.outputs.dask}}, ${{needs.execution-filter.outputs.unidist}}, ${{matrix.running-all-ray-tests}}, ${{matrix.running-all-dask-tests}}
	- uses: actions/checkout@v4
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: ${{ matrix.os == 'ubuntu' && matrix.execution == 'unidist' && 'requirements/env_unidist_linux.yml' \|\| matrix.os == 'windows' && matrix.execution == 'unidist' && 'requirements/env_unidist_win.yml' \|\| 'environment-dev.yml' }}
	activate-environment: ${{ matrix.execution == 'unidist' && 'modin_on_unidist' \|\| 'modin' }}
	python-version: ${{matrix.python-version}}
	- name: Install HDF5
	run: sudo apt update && sudo apt install -y libhdf5-dev
	if: matrix.os != 'windows'
	- name: Limit ray memory
	run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV
	if: matrix.os != 'windows' && matrix.execution == 'ray'
	- name: Tell Modin to use existing ray cluster
	run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV
	if: matrix.os == 'windows' && matrix.execution == 'ray'
	- name: Start local ray cluster
	# Try a few times to start ray to work around
	# https://github.com/modin-project/modin/issues/4562
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 5
	max_attempts: 5
	command: ray start --head --port=6379 --object-store-memory=1000000000
	if: matrix.os == 'windows' && matrix.execution == 'ray'
	- run: MODIN_BENCHMARK_MODE=True ${{ matrix.shell-ex }} modin/tests/pandas/internals/test_benchmark_mode.py
	- run: ${{ matrix.shell-ex }} $PARALLEL modin/tests/test_partition_api.py
	- run: ${{ matrix.shell-ex }} modin/tests/pandas/extensions
	- name: xgboost tests
	run: \|
	# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
	# when we use collective instead of rabit.
	mamba install "xgboost>=1.7.1,<2.0.0" scikit-learn -c conda-forge
	${{ matrix.shell-ex }} $PARALLEL \
	modin/tests/experimental/xgboost/test_default.py \
	modin/tests/experimental/xgboost/test_xgboost.py \
	modin/tests/experimental/xgboost/test_dmatrix.py
	if: matrix.os != 'windows' && needs.execution-filter.outputs.experimental == 'true'
	- run: ${{ matrix.shell-ex }} $PARALLEL modin/tests/experimental/test_pipeline.py
	if: matrix.os != 'windows' && matrix.execution != 'unidist' && needs.execution-filter.outputs.experimental == 'true'
	- name: "test DF: binary, default, iter"
	run: \|
	${{ matrix.shell-ex }} $PARALLEL \
	modin/tests/pandas/dataframe/test_binary.py \
	modin/tests/pandas/dataframe/test_default.py \
	modin/tests/pandas/dataframe/test_iter.py
	if: matrix.os != 'windows'
	- name: "test DF: reduce, udf, window, pickle"
	run: \|
	${{ matrix.shell-ex }} $PARALLEL \
	modin/tests/pandas/dataframe/test_reduce.py \
	modin/tests/pandas/dataframe/test_udf.py \
	modin/tests/pandas/dataframe/test_window.py \
	modin/tests/pandas/dataframe/test_pickle.py
	if: matrix.os != 'windows'
	- run: ${{ matrix.shell-ex }} modin/tests/pandas/test_series.py
	if: matrix.execution == 'ray'
	- run: ${{ matrix.shell-ex }} -m "not exclude_in_sanity" modin/tests/pandas/test_series.py
	if: matrix.execution != 'ray'
	- run: ${{ matrix.shell-ex }} modin/tests/pandas/dataframe/test_map_metadata.py
	if: matrix.execution == 'ray'
	- run: ${{ matrix.shell-ex }} -m "not exclude_in_sanity" modin/tests/pandas/dataframe/test_map_metadata.py
	if: matrix.execution != 'ray'
	- name: "test rolling, expanding, reshape, general, concat"
	run: \|
	${{ matrix.shell-ex }} $PARALLEL \
	modin/tests/pandas/test_rolling.py \
	modin/tests/pandas/test_expanding.py \
	modin/tests/pandas/test_reshape.py \
	modin/tests/pandas/test_general.py \
	modin/tests/pandas/test_concat.py
	if: matrix.os != 'windows'
	- run: ${{ matrix.shell-ex }} $PARALLEL modin/tests/numpy
	- run: ${{ matrix.shell-ex }} -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose
	if: matrix.execution != 'unidist'
	- uses: nick-fields/retry@v3
	# to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.
	# for details see: https://github.com/modin-project/modin/pull/6776
	with:
	timeout_minutes: 15
	max_attempts: 3
	command: conda run --no-capture-output -n modin_on_unidist ${{ matrix.shell-ex }} -m "not exclude_in_sanity" modin/tests/pandas/test_io.py --verbose
	if: matrix.execution == 'unidist'
	- run: ${{ matrix.shell-ex }} modin/tests/experimental/test_io_exp.py
	- run: ${{ matrix.shell-ex }} $PARALLEL modin/tests/interchange/dataframe_protocol/test_general.py
	- run: ${{ matrix.shell-ex }} $PARALLEL modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py
	- name: Stop local ray cluster
	run: ray stop
	if: matrix.os == 'windows' && matrix.execution == 'ray'
	- uses: ./.github/actions/upload-coverage

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TEST-#9999: Fix engine filter. #16248

Workflow file

TEST-#9999: Fix engine filter. #16248

Jobs

Run details

Workflow file for this run