forked from mlrun/mlrun
-
Notifications
You must be signed in to change notification settings - Fork 1
375 lines (331 loc) · 16.3 KB
/
system-tests-enterprise.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# Copyright 2023 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: System Tests Enterprise
on:
push:
branches:
- '.+-system-tests'
schedule:
# * is a special character in YAML so you have to quote this string
# Run the system tests every 8 hours (cron hours should divide 24 equally, otherwise there will be an overlap at the end of the day)
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
docker_registry:
description: 'Docker registry to pull images from (default: ghcr.io/, use registry.hub.docker.com/ for docker hub)'
required: true
default: 'ghcr.io/'
clean_resources_in_teardown:
description: 'Clean resources created by test (like project) in each test teardown (default: true - perform clean)'
required: true
default: 'true'
type: choice
options:
- 'true'
- 'false'
override_iguazio_version:
description: 'Override the configured target system iguazio version (leave empty to resolve automatically)'
required: false
concurrency: one-at-a-time
jobs:
system-test-cleanup:
name: System Test Cleanup
runs-on: [ self-hosted, Linux ]
container:
image: ubuntu:latest
timeout-minutes: 10
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
apt-get update -qqy && apt-get install -y sshpass
- name: cleanup docker images from registries
# SSH to datanode and delete all docker images created by the system tests by restarting the docker-registry
# deployment and the datanode docker-registry
run: |
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
ssh \
-o StrictHostKeyChecking=no \
-o ServerAliveInterval=180 \
-o ServerAliveCountMax=3 \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }} \
kubectl -n default-tenant rollout restart deployment docker-registry
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
automation/system_test/cleanup.py \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/cleanup.py
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
automation/system_test/dev_utilities.py \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/dev_utilities.py
prepare-system-tests-enterprise-ci:
# When increasing the timeout make sure it's not larger than the schedule cron interval
timeout-minutes: 55
name: Prepare System Tests Enterprise
runs-on: [ self-hosted, Linux ]
container:
image: python:3.9
needs: [system-test-cleanup]
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
apt-get update -qqy && apt-get install -y sshpass jq curl gnupg nodejs
- uses: actions/setup-node@v4
with:
node-version: 18
- name: Copy state branch file from remote
run: |
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
-o StrictHostKeyChecking=no \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/system-tests-branches-list.txt \
system-tests-branches-list.txt
- name: Resolve Branch To Run System Tests
id: current-branch
shell: bash
# we store a file named /tmp/system-tests-branches-list.txt which contains a list of branches to run system tests
# on the branches are separated with commas, so each run we pop the first branch in the list and append it to the
# end of the list.
# This mechanism allows us to run on multiple branches without the need to modify the file or secrets each time
# a new branch is added or removed
run: |
# Read branches from local file
branches=$(cat system-tests-branches-list.txt)
echo "branches found in system-tests-branches-list.txt: $branches"
# Split branches into an array
IFS=',' read -ra branches_array <<< "$branches"
# Get the first branch in the list to work on
first_branch="${branches_array[0]}"
echo "working on $first_branch"
# Remove the first branch from the list
branches_array=("${branches_array[@]:1}")
# Add the first branch at the end of the list
branches_array+=("$first_branch")
# Join branches back into a string
branches=$(printf ",%s" "${branches_array[@]}")
branches=${branches:1}
# Output the new list of branches
echo "$branches"
# Write new branches order to a local file
echo "$branches" | cat > system-tests-branches-list.txt
# Set output
echo "name=$(echo $first_branch)" >> $GITHUB_OUTPUT
- name: Override remote file from local resolved branch list
run: |
# Override the remote file with the new list of branches
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
-o StrictHostKeyChecking=no system-tests-branches-list.txt \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/
# checking out to base branch and not the target(resolved) branch, to be able to run the changed preparation code
# before merging the changes to upstream.
- name: Checkout base branch
uses: actions/checkout@v4
- name: Install automation scripts dependencies
run: |
python -m pip install -r automation/requirements.txt
- name: Extract git hashes from upstream and latest version
id: git_upstream_info
run: |
# Get the latest commit of mlrun/mlrun (that is older than 1 hour)
echo "mlrun_hash=$( \
cd /tmp && \
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/mlrun.git mlrun-upstream 2> /dev/null && \
cd mlrun-upstream && \
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit HEAD && \
cd .. && \
rm -rf mlrun-upstream)" >> $GITHUB_OUTPUT
# Get the latest commit of mlrun/ui (that is older than 1 hour)
echo "ui_hash=$( \
cd /tmp && \
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/ui.git mlrun-ui 2> /dev/null && \
cd mlrun-ui && \
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit HEAD && \
cd .. && \
rm -rf mlrun-ui)" >> $GITHUB_OUTPUT
# Get the tested mlrun version
echo "unstable_version_prefix=$( \
curl https://raw.githubusercontent.com/mlrun/mlrun/${{ steps.current-branch.outputs.name }}/automation/version/unstable_version_prefix \
)" >> $GITHUB_OUTPUT
- name: Set computed versions params
id: computed_params
run: |
action_mlrun_hash=${{ steps.git_action_info.outputs.mlrun_hash }} && \
upstream_mlrun_hash=${{ steps.git_upstream_info.outputs.mlrun_hash }} && \
export mlrun_hash=${upstream_mlrun_hash:-`echo $action_mlrun_hash`}
echo "mlrun_hash=$(echo $mlrun_hash)" >> $GITHUB_OUTPUT
action_mlrun_ui_hash=${{ steps.git_action_ui_info.outputs.ui_hash }} && \
upstream_mlrun_ui_hash=${{ steps.git_upstream_info.outputs.ui_hash }} && \
export ui_hash=${upstream_mlrun_ui_hash:-`echo $action_mlrun_ui_hash`}
echo "ui_hash=$(echo $ui_hash)" >> $GITHUB_OUTPUT
echo "mlrun_version=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}+$mlrun_hash)" >> $GITHUB_OUTPUT
echo "mlrun_docker_tag=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$mlrun_hash)" >> $GITHUB_OUTPUT
echo "mlrun_ui_version=${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$ui_hash" >> $GITHUB_OUTPUT
echo "mlrun_docker_registry=$( \
input_docker_registry=$INPUT_DOCKER_REGISTRY && \
echo ${input_docker_registry:-ghcr.io/})" >> $GITHUB_OUTPUT
echo "mlrun_system_tests_clean_resources=$( \
input_system_tests_clean_resources=$INPUT_CLEAN_RESOURCES_IN_TEARDOWN && \
echo ${input_system_tests_clean_resources:-true})" >> $GITHUB_OUTPUT
echo "override_iguazio_version=$INPUT_OVERRIDE_IGUAZIO_VERSION" >> $GITHUB_OUTPUT
env:
INPUT_DOCKER_REGISTRY: ${{ github.event.inputs.docker_registry }}
INPUT_OVERRIDE_IGUAZIO_VERSION: ${{ github.event.inputs.override_iguazio_version }}
INPUT_CLEAN_RESOURCES_IN_TEARDOWN: ${{ github.event.inputs.clean_resources_in_teardown }}
- name: Prepare System Test Environment and Install MLRun
env:
IP_ADDR_PREFIX: ${{ secrets.IP_ADDR_PREFIX }}
timeout-minutes: 50
run: |
python automation/system_test/prepare.py run \
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
--provctl-download-url "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_PATH }}" \
--provctl-download-s3-access-key "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_ACCESS_KEY }}" \
--provctl-download-s3-key-id "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_KEY_ID }}" \
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \
--iguazio-version "${{ steps.computed_params.outputs.iguazio_version }}" \
--mlrun-version "${{ steps.computed_params.outputs.mlrun_version }}" \
--mlrun-ui-version "${{ steps.computed_params.outputs.mlrun_ui_version }}" \
--mlrun-commit "${{ steps.computed_params.outputs.mlrun_hash }}" \
--override-image-registry "${{ steps.computed_params.outputs.mlrun_docker_registry }}"
- name: Prepare System Test env.yml and MLRun installation from current branch
timeout-minutes: 5
run: |
python automation/system_test/prepare.py env \
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \
--slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \
--branch "${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \
--github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" \
--save-to-path "$GITHUB_WORKSPACE/env.yml" \
--kubeconfig-content "${{ secrets.LATEST_SYSTEM_TEST_KUBECONFIG }}"
- name: Encrypt file
id: encrypt_file
run: |
gpg \
--batch \
--passphrase "${{ env.GPG_PASSPHRASE }}" \
--output "$GITHUB_WORKSPACE/env.yml.gpg" \
--symmetric "$GITHUB_WORKSPACE/env.yml"
echo "env_file_path=$(echo $GITHUB_WORKSPACE/env.yml.gpg)" >> $GITHUB_OUTPUT
env:
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Upload env file
uses: actions/upload-artifact@v4
with:
name: env
path: "${{ steps.encrypt_file.outputs.env_file_path }}"
if-no-files-found: error
outputs:
mlrunVersion: ${{ steps.computed_params.outputs.mlrun_version }}
mlrunBranch: ${{ steps.current-branch.outputs.name }}
mlrunSystemTestsCleanResources: ${{ steps.computed_params.outputs.mlrun_system_tests_clean_resources }}
run-system-tests-enterprise-ci:
# When increasing the timeout make sure it's not larger than the schedule cron interval
timeout-minutes: 360
name: Test ${{ matrix.test_component }} [${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}]
# requires prepare to finish before starting
needs: [prepare-system-tests-enterprise-ci]
runs-on: [ self-hosted, Linux ]
container:
image: python:3.9
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
strategy:
fail-fast: false
max-parallel: 1
matrix:
test_component:
- api
- runtimes
- projects
- model_monitoring
- examples
- backwards_compatibility
- datastore
- logs
- feature_store
- alerts
steps:
- name: Install Dependencies
run: |
# gnupg for decrypting env.yml.gpg
# nodejs for installing github action dependencies
# graphviz for generating graphs (feature store tests)
# git-core for cloning repos and etc, required by many suites
# gcc, make for installing python packages
apt-get update -qqy && \
apt-get install -y \
gnupg \
nodejs \
graphviz \
git-core \
gcc \
make
- uses: actions/setup-node@v4
with:
node-version: 18
- uses: actions/checkout@v4
# checking out to the resolved branch to run system tests on, as now we run the actual tests, we don't want to run
# the system tests of the branch that triggered the system tests as it might be in a different version
# than the mlrun version we deployed on the previous job (can have features that the resolved branch doesn't have)
with:
ref: ${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}
- uses: actions/download-artifact@v4
with:
name: env
path: /tmp
- name: Decrypt file
run: |
gpg \
--batch \
--passphrase "${{ env.GPG_PASSPHRASE }}" \
--output "$GITHUB_WORKSPACE/tests/system/env.yml" \
--decrypt "/tmp/env.yml.gpg"
# ensure file is created
test -f "$GITHUB_WORKSPACE/tests/system/env.yml"
env:
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Pre Tests Requirements
run: |
# due to
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9
# ensure mlrun git is safe to use
git config --global --add safe.directory "$GITHUB_WORKSPACE"
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \
make install-requirements install-complete-requirements update-version-file
- name: Run System Tests
run: |
MLRUN_SYSTEM_TESTS_GITHUB_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
MLRUN_SYSTEM_TESTS_CLEAN_RESOURCES="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunSystemTestsCleanResources }}" \
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \
MLRUN_SYSTEM_TESTS_COMPONENT="${{ matrix.test_component }}" \
MLRUN_SYSTEM_TESTS_BRANCH="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \
make test-system