-
Notifications
You must be signed in to change notification settings - Fork 1.6k
207 lines (180 loc) · 7.3 KB
/
pytest.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
name: Unittests
on:
workflow_dispatch:
pull_request:
branches:
- master
# Do not trigger tests for documentation or markdown docs.
paths-ignore:
- 'docs/**'
- '*.md'
push:
branches:
- master
# Do not trigger tests for documentation or markdown docs.
paths-ignore:
- 'docs/**'
- '*.md'
schedule:
# Trigger tests every day at 02:00 UTC to refresh cache.
- cron: '0 2 * * *'
# Cancel in-progress runs for the current workflow if not on the main branch
# (as it mark the unittests as failed).
# Conditionals to concurrent are based on the solution proposed in this link:
# https://github.community/t/concurrency-cancel-in-progress-but-not-when-ref-is-master/194707
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref != 'refs/heads/master' || github.run_number }}
# Cancel only PR intermediate builds
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
PYTEST_NUM_SHARDS: 4 # Controls tests sharding enabled by `pytest-shard`
jobs:
activate-tests:
name: Check if tests should be run
runs-on: ubuntu-latest
steps:
- name: Check
id: check
# For merged PR, activate testing only on the master branch, based on:
# https://github.community/t/trigger-workflow-only-on-pull-request-merge/17359
run: |
echo "status=${{ github.ref == 'refs/heads/master' || (
github.event.action != 'closed'
&& github.event.pull_request.merged == false
) }}" >> $GITHUB_OUTPUT
outputs:
status: ${{ steps.check.outputs.status }}
shards-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
name: Generate shards
runs-on: ubuntu-latest
steps:
- name: Create variables
id: create-vars
run: |
echo "num-shards=$(jq -n -c '[${{ env.PYTEST_NUM_SHARDS }}]')" >> $GITHUB_OUTPUT
echo "shard-ids=$(jq -n -c '[range(1;${{ env.PYTEST_NUM_SHARDS }}+1)]')" >> $GITHUB_OUTPUT
outputs:
num-shards: ${{ steps.create-vars.outputs.num-shards }}
shard-ids: ${{ steps.create-vars.outputs.shard-ids }}
pytest-job:
needs: shards-job
name: '[${{ matrix.os-version }}][${{ matrix.tf-version }}][Python ${{ matrix.python-version }}][${{ matrix.shard-id }}/${{ matrix.num-shards }}] Core TFDS tests'
runs-on: ${{ matrix.os-version }}
timeout-minutes: 30
strategy:
# Do not cancel in-progress jobs if any matrix job fails.
fail-fast: false
matrix:
tf-version: ['tensorflow']
# Can't reference env variables in matrix
num-shards: ${{ fromJson(needs.shards-job.outputs.num-shards) }}
shard-id: ${{ fromJson(needs.shards-job.outputs.shard-ids) }}
# TF suppported versions: https://www.tensorflow.org/install/pip#software_requirements
python-version: ['3.10', '3.11', '3.12']
os-version: [ubuntu-latest]
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: ${{ matrix.tf-version }}
python-version: ${{ matrix.python-version }}
# Run tests
# Ignores:
# * Nsynth is run in isolation due to dependency conflict (crepe).
# * Lsun tests is disabled because the tensorflow_io used in open-source
# is linked to static libraries compiled again specific TF version, which
# makes test fails with linking error (libtensorflow_io_golang.so).
# * imagenet2012_corrupted requires imagemagick binary.
# * import_without_tf_test.py, because the test relies on TensorFlow not being imported.
# * github_api is run separately to not overuse API quota.
# * wmt is run separately to avoid worker hanging.
# * Huggingface requires `datasets` library.
- name: Run core tests
run: |
pytest --durations=100 -vv -n auto --shard-id=$((${{ matrix.shard-id }} - 1)) --num-shards=${{ env.PYTEST_NUM_SHARDS }} \
--ignore="tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" \
--ignore="tensorflow_datasets/image/lsun_test.py" \
--ignore="tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" \
--ignore="tensorflow_datasets/scripts/documentation/build_api_docs_test.py" \
--ignore="tensorflow_datasets/import_without_tf_test.py" \
--ignore="tensorflow_datasets/core/github_api/github_path_test.py" \
--ignore="tensorflow_datasets/translate/wmt19_test.py" \
--ignore="tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py" \
--ignore="tensorflow_datasets/core/utils/huggingface_utils_test.py"
# Run tests without any pytest plugins. The tests should be triggered for a single shard only.
- name: Run leftover tests
if: ${{ matrix.shard-id == 1 }}
uses: nick-fields/retry@v2
with:
timeout_minutes: 1
max_attempts: 2
retry_on: timeout
command: |
pytest -vv -o faulthandler_timeout=10 tensorflow_datasets/translate/wmt19_test.py
huggingface-pytest-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
# HuggingFace tests need to be run separately because they're disabled without installed
# `datasets` library.
name: 'HuggingFace Python 3.10 tests'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: tensorflow
python-version: '3.10'
extras: huggingface
- name: Run HuggingFace tests
run: |
pytest -vv -n auto \
tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py \
tensorflow_datasets/core/utils/huggingface_utils_test.py
githubapi-pytest-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
name: 'Github API tests'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: tensorflow
- name: Run Github API tests
run: pytest --durations=100 -vv -n auto tensorflow_datasets/core/github_api/github_path_test.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
notebook-test-job:
needs: activate-tests
if: ${{ needs.activate-tests.outputs.status }}
name: 'Notebook tests'
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/setup
with:
tf-version: tensorflow
use-cache: false
# Test each notebook sequentially.
- name: Run notebook
run: |
ipython kernel install --user --name tfds-notebook
for notebook in docs/*ipynb
do
# These notebooks time out because they rely on loading huge datasets.
if [[ "$notebook" != "docs/determinism.ipynb" ]] && \
[[ "$notebook" != "docs/dataset_collections.ipynb" ]]
then
jupyter nbconvert \
--ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=tfds-notebook \
--to notebook \
--execute $notebook && \
pip install tensorflow # reinstall tensorflow if it was uninstalled
fi
done