From eb03959e437e11891b8c3696b76f664a991a37a4 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 9 Sep 2022 17:56:50 +0200 Subject: [PATCH] Update CI documentation, renaming runs to "Canary" (#26151) For quite some time we did not have the name of the main builds However more and more the "main" builds are used to provide early warnigns for some problems: * 3rd-party dependencies breaking our builds * our own dependencies breaking the constraints * building ARM images * building breeze images quickly * running complete matrix of tests * finding flaky tests So effectively, those main builds are really "Canary" builds - when those builds are failing, they give us a chance to react quickly, without affecting the regular PR builds. This PR clarifies the meaning and reasoning for those builds and introduces "Canary" name for them. During related documentation review, it also turned out that a number of old environment variables are not used any more (after the breeze changing to Python) and this PR also removes them from documentation and removes the variables from all the scripts (including removal of some unused scripts) The new documentation also mentions somethign that we've learned recently - that in case you use Breeze in non-airflow workflows in GitHub Actions, you need to override the variables through command line parameters rather than through environment variables, because GitHub actions treats GITHUB_* variables as immutable for security. --- .github/workflows/build-images.yml | 2 - .github/workflows/ci.yml | 52 ++- CI.rst | 308 +++++++----------- CI_DIAGRAMS.md | 8 +- .../airflow_breeze/commands/ci_commands.py | 4 +- .../src/airflow_breeze/global_constants.py | 6 - .../utils/docker_command_utils.py | 1 - dev/breeze/tests/test_pr_info.py | 16 +- scripts/ci/docker-compose/_docker.env | 1 - scripts/ci/docker-compose/base.yml | 1 - scripts/ci/docker-compose/devcontainer.env | 1 - scripts/ci/libraries/_all_libs.sh | 2 - scripts/ci/libraries/_initialization.sh | 14 +- scripts/ci/libraries/_md5sum.sh | 154 --------- 14 files changed, 164 insertions(+), 406 deletions(-) delete mode 100644 scripts/ci/libraries/_md5sum.sh diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 5c3be9270f20d..f8433ab92e35f 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -26,8 +26,6 @@ permissions: env: MOUNT_SELECTED_LOCAL_SOURCES: "false" ANSWER: "yes" - CHECK_IMAGE_FOR_REBUILD: "true" - SKIP_CHECK_REMOTE_IMAGE: "true" DB_RESET: "true" VERBOSE: "true" GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51fd6ac9b0221..ece19a4c8f49a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,8 +31,6 @@ permissions: env: MOUNT_SELECTED_LOCAL_SOURCES: "false" ANSWER: "yes" - CHECK_IMAGE_FOR_REBUILD: "true" - SKIP_CHECK_REMOTE_IMAGE: "true" DB_RESET: "true" VERBOSE: "true" GITHUB_REPOSITORY: ${{ github.repository }} @@ -168,7 +166,7 @@ jobs: in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} runs-on: ${{ steps.source-run-info.outputs.runs-on }} - merge-run: ${{ steps.source-run-info.outputs.merge-run }} + canary-run: ${{ steps.source-run-info.outputs.canary-run }} run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} steps: - name: Cleanup repo @@ -261,27 +259,27 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 with: persist-credentials: false - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Setup python" uses: actions/setup-python@v4 with: python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Free space" run: breeze ci free-space - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: matrix.platform == 'linux/arm64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/arm64' && needs.build-info.outputs.canary-run == 'true' - name: "Push CI cache ${{ matrix.platform }}" run: > breeze ci-image build @@ -290,23 +288,23 @@ jobs: --run-in-parallel --force-build --platform ${{ matrix.platform }} - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Push CI latest image ${{ matrix.platform }}" run: > breeze ci-image build --tag-as-latest --push --run-in-parallel --platform ${{ matrix.platform }} # We only push "amd" image as it is really only needed for any kind of automated builds in CI # and currently there is not an easy way to make multi-platform image from two separate builds - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && matrix.platform == 'linux/arm64' && needs.build-info.outputs.merge-run == 'true' + if: always() && matrix.platform == 'linux/arm64' && needs.build-info.outputs.canary-run == 'true' - name: "Clean docker cache for ${{ matrix.platform }}" run: docker system prune --all --force - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Fix ownership" run: breeze ci fix-ownership - if: always() && needs.build-info.outputs.merge-run == 'true' + if: always() && needs.build-info.outputs.canary-run == 'true' # Check that after earlier cache push, breeze command will build quickly chcek-that-image-builds-quicklly: @@ -323,30 +321,30 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 with: persist-credentials: false - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Setup python" uses: actions/setup-python@v4 with: python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Free space" run: breeze ci free-space - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Check that image builds quickly" run: breeze shell --max-time 120 - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Fix ownership" run: breeze ci fix-ownership - if: always() && needs.build-info.outputs.merge-run == 'true' + if: always() && needs.build-info.outputs.canary-run == 'true' build-ci-images: permissions: @@ -1636,20 +1634,20 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: Checkout ${{ steps.constraints-branch.outputs.branch }} uses: actions/checkout@v3 - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' with: path: "repo" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: false - name: "Commit changed constraint files for ${{needs.build-info.outputs.python-versions}}" run: ./scripts/ci/constraints/ci_commit_constraints.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Push changes" uses: ./.github/actions/github-push-action - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: ${{ steps.constraints-branch.outputs.branch }} @@ -1672,7 +1670,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - build-info - constraints - docs - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' strategy: fail-fast: false matrix: @@ -1776,7 +1774,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: > needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' && needs.build-info.outputs.in-workflow-build == 'true' && - needs.build-info.outputs.merge-run != 'true' + needs.build-info.outputs.canary-run != 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" diff --git a/CI.rst b/CI.rst index 1b86a43b41e13..ccabd58463a96 100644 --- a/CI.rst +++ b/CI.rst @@ -21,7 +21,7 @@ CI Environment ============== Continuous Integration is important component of making Apache Airflow robust and stable. We are running -a lot of tests for every pull request, for main and v2-*-test branches and regularly as CRON jobs. +a lot of tests for every pull request, for main and v2-*-test branches and regularly as scheduled jobs. Our execution environment for CI is `GitHub Actions `_. GitHub Actions (GA) are very well integrated with GitHub code and Workflow and it has evolved fast in 2019/202 to become @@ -84,6 +84,13 @@ We use `GitHub Container Registry [Write Token] activate Airflow Repo Airflow Repo -->> Tests: Trigger 'push' @@ -199,6 +199,7 @@ sequenceDiagram and Note over Tests: Build CI Images
Use original constraints Tests ->> GitHub Registry: Push CI Image Early cache + latest + Note over Tests: Test 'breeze' image build quickly end Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] @@ -257,7 +258,7 @@ sequenceDiagram deactivate Tests ``` -## Scheduled build flow +## Scheduled run ```mermaid sequenceDiagram @@ -279,6 +280,7 @@ sequenceDiagram and Note over Tests: Build CI Images
Use original constraints Tests ->> GitHub Registry: Push CI Image Early cache + latest + Note over Tests: Test 'breeze' image build quickly end Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_commands.py index 9a6550cf02725..0c46c5fcf7941 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands.py @@ -297,7 +297,7 @@ def print_ga_outputs(self): print(get_ga_output(name="runs-on", value=self.get_runs_on())) print(get_ga_output(name='in-workflow-build', value=self.in_workflow_build())) print(get_ga_output(name="build-job-description", value=self.get_build_job_description())) - print(get_ga_output(name="merge-run", value=self.is_merge_run())) + print(get_ga_output(name="canary-run", value=self.is_canary_run())) print(get_ga_output(name="run-coverage", value=self.run_coverage())) def get_runs_on(self) -> str: @@ -319,7 +319,7 @@ def get_build_job_description(self) -> str: return "Build" return "Skip Build (look in pull_request_target)" - def is_merge_run(self) -> str: + def is_canary_run(self) -> str: if ( self.event_name == 'push' and self.head_repo == "apache/airflow" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index b32de8d7547cf..c5d93ced54ca5 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -29,13 +29,7 @@ RUNS_ON_PUBLIC_RUNNER = "ubuntu-20.04" RUNS_ON_SELF_HOSTED_RUNNER = "self-hosted" -# Commented this out as we are using buildkit and this vars became irrelevant -# FORCE_PULL_IMAGES = False -# CHECK_IF_BASE_PYTHON_IMAGE_UPDATED = False -FORCE_BUILD_IMAGES = False ANSWER = "" -SKIP_CHECK_REMOTE_IMAGE = False -# PUSH_PYTHON_BASE_IMAGE = False APACHE_AIRFLOW_GITHUB_REPOSITORY = "apache/airflow" diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 8172a40685882..df2ce47ed0bb9 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -580,7 +580,6 @@ def update_expected_environment_variables(env: Dict[str, str]) -> None: set_value_to_default_if_not_set(env, 'DEFAULT_BRANCH', AIRFLOW_BRANCH) set_value_to_default_if_not_set(env, 'ENABLED_SYSTEMS', "") set_value_to_default_if_not_set(env, 'ENABLE_TEST_COVERAGE', "false") - set_value_to_default_if_not_set(env, 'GITHUB_REGISTRY_PULL_IMAGE_TAG', "latest") set_value_to_default_if_not_set(env, 'HOST_GROUP_ID', get_host_group_id()) set_value_to_default_if_not_set(env, 'HOST_OS', get_host_os()) set_value_to_default_if_not_set(env, 'HOST_USER_ID', get_host_user_id()) diff --git a/dev/breeze/tests/test_pr_info.py b/dev/breeze/tests/test_pr_info.py index ca2ada5ecdf7a..58e75f09c802b 100644 --- a/dev/breeze/tests/test_pr_info.py +++ b/dev/breeze/tests/test_pr_info.py @@ -39,7 +39,7 @@ def test_pr_info(): assert wi.event_name == 'pull_request' assert wi.pr_number == 26004 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -53,7 +53,7 @@ def test_push_info(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "true" + assert wi.is_canary_run() == "true" assert wi.run_coverage() == "true" @@ -67,7 +67,7 @@ def test_schedule(): assert wi.event_name == 'schedule' assert wi.pr_number is None assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -81,7 +81,7 @@ def test_runs_on_self_hosted(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -95,7 +95,7 @@ def test_runs_on_forced_public_runner(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -109,7 +109,7 @@ def test_runs_on_simple_pr_other_repo(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -123,7 +123,7 @@ def test_runs_on_push_other_branch(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -137,5 +137,5 @@ def test_runs_on_push_v_test_branch(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "true" + assert wi.is_canary_run() == "true" assert wi.run_coverage() == "false" diff --git a/scripts/ci/docker-compose/_docker.env b/scripts/ci/docker-compose/_docker.env index 8b90912eaa592..3dafb73f6867b 100644 --- a/scripts/ci/docker-compose/_docker.env +++ b/scripts/ci/docker-compose/_docker.env @@ -35,7 +35,6 @@ ENABLED_INTEGRATIONS ENABLED_SYSTEMS ENABLE_TEST_COVERAGE GITHUB_ACTIONS -GITHUB_REGISTRY_PULL_IMAGE_TAG HOST_USER_ID HOST_GROUP_ID HOST_OS diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index 7904bc74fb873..b5d0f895b59aa 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -48,7 +48,6 @@ services: - ENABLED_SYSTEMS=${ENABLED_SYSTEMS} - ENABLE_TEST_COVERAGE=${ENABLE_TEST_COVERAGE} - GITHUB_ACTIONS=${GITHUB_ACTIONS} - - GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG} - HOST_USER_ID=${HOST_USER_ID} - HOST_GROUP_ID=${HOST_GROUP_ID} - HOST_OS=${HOST_OS} diff --git a/scripts/ci/docker-compose/devcontainer.env b/scripts/ci/docker-compose/devcontainer.env index 3f40808d986cc..4f10d5beb6c7d 100644 --- a/scripts/ci/docker-compose/devcontainer.env +++ b/scripts/ci/docker-compose/devcontainer.env @@ -35,7 +35,6 @@ ENABLED_INTEGRATIONS= ENABLED_SYSTEMS= ENABLE_TEST_COVERAGE="false" GITHUB_ACTIONS="false" -GITHUB_REGISTRY_PULL_IMAGE_TAG="" HOST_USER_ID= HOST_GROUP_ID= HOST_OS="linux" diff --git a/scripts/ci/libraries/_all_libs.sh b/scripts/ci/libraries/_all_libs.sh index 87de95bfa09a8..da893d5dcb613 100755 --- a/scripts/ci/libraries/_all_libs.sh +++ b/scripts/ci/libraries/_all_libs.sh @@ -36,8 +36,6 @@ readonly SCRIPTS_CI_DIR . "${LIBRARIES_DIR}"/_sanity_checks.sh # shellcheck source=scripts/ci/libraries/_local_mounts.sh . "${LIBRARIES_DIR}"/_local_mounts.sh -# shellcheck source=scripts/ci/libraries/_md5sum.sh -. "${LIBRARIES_DIR}"/_md5sum.sh # shellcheck source=scripts/ci/libraries/_start_end.sh . "${LIBRARIES_DIR}"/_start_end.sh # shellcheck source=scripts/ci/libraries/_testing.sh diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index 2d7757d55f386..42b6896319e68 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -274,16 +274,10 @@ function initialization::initialize_mount_variables() { # Determine values of force settings function initialization::initialize_force_variables() { - # Determines whether to force build without checking if it is needed - # Can be overridden by '--force-build-images' flag. - export FORCE_BUILD_IMAGES=${FORCE_BUILD_IMAGES:="false"} # Can be set to "yes/no/quit" in order to force specified answer to all questions asked to the user. export ANSWER=${ANSWER:=""} - # Can be set to true to skip if the image is newer in registry - export SKIP_CHECK_REMOTE_IMAGE=${SKIP_CHECK_REMOTE_IMAGE:="false"} - # integrations are disabled by default export ENABLED_INTEGRATIONS=${ENABLED_INTEGRATIONS:=""} @@ -461,8 +455,6 @@ function initialization::initialize_git_variables() { } function initialization::initialize_github_variables() { - export GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG:="latest"} - export GITHUB_REGISTRY_PUSH_IMAGE_TAG=${GITHUB_REGISTRY_PUSH_IMAGE_TAG:="latest"} export GITHUB_REPOSITORY=${GITHUB_REPOSITORY:="apache/airflow"} # Allows to override the repository which is used as source of constraints during the build @@ -557,7 +549,7 @@ function initialization::get_docker_cache_image_names() { # Example: # ghcr.io/apache/airflow/main/ci/python3.8:latest # ghcr.io/apache/airflow/main/ci/python3.8: - export AIRFLOW_CI_IMAGE_WITH_TAG="${image_name}/${BRANCH_NAME}/ci/python${PYTHON_MAJOR_MINOR_VERSION}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + export AIRFLOW_CI_IMAGE_WITH_TAG="${image_name}/${BRANCH_NAME}/ci/python${PYTHON_MAJOR_MINOR_VERSION}:latest" # File that is touched when the CI image is built for the first time locally export BUILT_CI_IMAGE_FLAG_FILE="${BUILD_CACHE_DIR}/${BRANCH_NAME}/.built_${PYTHON_MAJOR_MINOR_VERSION}" @@ -585,9 +577,7 @@ Mount variables: Force variables: - FORCE_BUILD_IMAGES: ${FORCE_BUILD_IMAGES} ANSWER: ${ANSWER} - SKIP_CHECK_REMOTE_IMAGE: ${SKIP_CHECK_REMOTE_IMAGE} Host variables: @@ -643,7 +633,6 @@ Detected GitHub environment: GITHUB_REPOSITORY: '${GITHUB_REPOSITORY}' GITHUB_USERNAME: '${GITHUB_USERNAME}' GITHUB_REGISTRY_PULL_IMAGE_TAG: '${GITHUB_REGISTRY_PULL_IMAGE_TAG}' - GITHUB_REGISTRY_PUSH_IMAGE_TAG: '${GITHUB_REGISTRY_PUSH_IMAGE_TAG}' GITHUB_ACTIONS: '${GITHUB_ACTIONS=}' Initialization variables: @@ -772,7 +761,6 @@ function initialization::make_constants_read_only() { readonly ADDITIONAL_RUNTIME_APT_ENV readonly GITHUB_REGISTRY_PULL_IMAGE_TAG - readonly GITHUB_REGISTRY_PUSH_IMAGE_TAG readonly GITHUB_REPOSITORY readonly GITHUB_TOKEN diff --git a/scripts/ci/libraries/_md5sum.sh b/scripts/ci/libraries/_md5sum.sh deleted file mode 100644 index 1838936fc903a..0000000000000 --- a/scripts/ci/libraries/_md5sum.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -declare -a MODIFIED_FILES -# -# Verifies if stored md5sum of the file changed since the last time it was checked -# The md5sum files are stored in .build directory - you can delete this directory -# If you want to rebuild everything from the scratch -# -function md5sum::calculate_file_md5sum { - local file="${1}" - local md5sum - local md5sum_cache_dir="${BUILD_CACHE_DIR}/${BRANCH_NAME}/${PYTHON_MAJOR_MINOR_VERSION}/${THE_IMAGE_TYPE}" - mkdir -pv "${md5sum_cache_dir}" - md5sum=$(md5sum "${file}") - local md5sum_file - md5sum_file="${md5sum_cache_dir}"/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum - local md5sum_file_new - md5sum_file_new=${CACHE_TMP_FILE_DIR}/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum.new - echo "${md5sum}" > "${md5sum_file_new}" - local ret_code=0 - if [[ ! -f "${md5sum_file}" ]]; then - verbosity::print_info "Missing md5sum for ${file#"${AIRFLOW_SOURCES}"} (${md5sum_file#"${AIRFLOW_SOURCES}"})" - ret_code=1 - else - diff "${md5sum_file_new}" "${md5sum_file}" >/dev/null - local res=$? - if [[ "${res}" != "0" ]]; then - verbosity::print_info "The md5sum changed for ${file}: was $(cat "${md5sum_file}") now it is $(cat "${md5sum_file_new}")" - if [[ ${CI} == "true" ]]; then - echo "${COLOR_RED}The file has changed: ${file}${COLOR_RESET}" - echo "${COLOR_BLUE}==============================${COLOR_RESET}" - cat "${file}" - echo "${COLOR_BLUE}==============================${COLOR_RESET}" - fi - ret_code=1 - fi - fi - return ${ret_code} -} - -# -# Moves md5sum file from it's temporary location in CACHE_TMP_FILE_DIR to -# BUILD_CACHE_DIR - thus updating stored MD5 sum for the file -# -function md5sum::move_file_md5sum { - local file="${1}" - local md5sum_file - local md5sum_cache_dir="${BUILD_CACHE_DIR}/${BRANCH_NAME}/${PYTHON_MAJOR_MINOR_VERSION}/${THE_IMAGE_TYPE}" - mkdir -pv "${md5sum_cache_dir}" - md5sum_file="${md5sum_cache_dir}"/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum - local md5sum_file_new - md5sum_file_new=${CACHE_TMP_FILE_DIR}/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum.new - if [[ -f "${md5sum_file_new}" ]]; then - mv "${md5sum_file_new}" "${md5sum_file}" - verbosity::print_info "Updated md5sum file ${md5sum_file} for ${file}: $(cat "${md5sum_file}")" - fi -} - -# -# Stores md5sum files for all important files and -# records that we built the images locally so that next time we use -# it from the local docker cache rather than pull (unless forced) -# -function md5sum::update_all_md5() { - verbosity::print_info - verbosity::print_info "Updating md5sum files" - verbosity::print_info - for file in "${FILES_FOR_REBUILD_CHECK[@]}" - do - md5sum::move_file_md5sum "${AIRFLOW_SOURCES}/${file}" - done - mkdir -pv "${BUILD_CACHE_DIR}/${BRANCH_NAME}" - touch "${BUILT_CI_IMAGE_FLAG_FILE}" -} - -function md5sum::update_all_md5_with_group() { - start_end::group_start "Update MD5 hashes for pulled images" - md5sum::update_all_md5 - start_end::group_end -} - -function md5sum::calculate_md5sum_for_all_files() { - FILES_MODIFIED="false" - set +e - for file in "${FILES_FOR_REBUILD_CHECK[@]}" - do - if ! md5sum::calculate_file_md5sum "${AIRFLOW_SOURCES}/${file}"; then - FILES_MODIFIED="true" - MODIFIED_FILES+=( "${file}" ) - fi - done - set -e -} - -# -# Checks md5sum of all important files in order to optimise speed of running various operations -# That mount sources of Airflow to container and require docker image built with latest dependencies. -# the Docker image will only be marked for rebuilding only in case any of the important files change: -# * setup.py -# * setup.cfg -# * Dockerfile.ci -# -# This is needed because we want to skip rebuilding of the image when only airflow sources change but -# Trigger rebuild in case we need to change dependencies (setup.py, setup.cfg, change version of Airflow -# or the Dockerfile.ci itself changes. -# -# Another reason to skip rebuilding Docker is thar currently it takes a bit longer time than simple Docker -# We need to fix group permissions of files in Docker because different linux build services have -# different default umask and Docker uses group permissions in checking for cache invalidation. -# -# As result of this check - most of the static checks will start pretty much immediately. -# -function md5sum::check_if_docker_build_is_needed() { - verbosity::print_info - verbosity::print_info "Checking if image build is needed for ${THE_IMAGE_TYPE} image." - verbosity::print_info - if [[ ${FORCE_BUILD_IMAGES:=""} == "true" ]]; then - verbosity::print_info - verbosity::print_info "${COLOR_YELLOW}Docker image build is forced for ${THE_IMAGE_TYPE} image${COLOR_RESET}" - verbosity::print_info - md5sum::calculate_md5sum_for_all_files - needs_docker_build="true" - else - md5sum::calculate_md5sum_for_all_files - if [[ ${FILES_MODIFIED} == "true" ]]; then - needs_docker_build="true" - fi - if [[ ${needs_docker_build} == "true" ]]; then - verbosity::print_info - verbosity::print_info "${COLOR_YELLOW}The files were modified and likely the ${THE_IMAGE_TYPE} image needs rebuild: ${MODIFIED_FILES[*]}${COLOR_RESET}" - verbosity::print_info - else - verbosity::print_info - verbosity::print_info "${COLOR_GREEN}Docker image build is not needed for ${THE_IMAGE_TYPE} image!${COLOR_RESET}" - verbosity::print_info - fi - fi -}