diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 5c3be9270f20d..f8433ab92e35f 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -26,8 +26,6 @@ permissions: env: MOUNT_SELECTED_LOCAL_SOURCES: "false" ANSWER: "yes" - CHECK_IMAGE_FOR_REBUILD: "true" - SKIP_CHECK_REMOTE_IMAGE: "true" DB_RESET: "true" VERBOSE: "true" GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51fd6ac9b0221..ece19a4c8f49a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,8 +31,6 @@ permissions: env: MOUNT_SELECTED_LOCAL_SOURCES: "false" ANSWER: "yes" - CHECK_IMAGE_FOR_REBUILD: "true" - SKIP_CHECK_REMOTE_IMAGE: "true" DB_RESET: "true" VERBOSE: "true" GITHUB_REPOSITORY: ${{ github.repository }} @@ -168,7 +166,7 @@ jobs: in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} runs-on: ${{ steps.source-run-info.outputs.runs-on }} - merge-run: ${{ steps.source-run-info.outputs.merge-run }} + canary-run: ${{ steps.source-run-info.outputs.canary-run }} run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} steps: - name: Cleanup repo @@ -261,27 +259,27 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 with: persist-credentials: false - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Setup python" uses: actions/setup-python@v4 with: python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Free space" run: breeze ci free-space - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: matrix.platform == 'linux/arm64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/arm64' && needs.build-info.outputs.canary-run == 'true' - name: "Push CI cache ${{ matrix.platform }}" run: > breeze ci-image build @@ -290,23 +288,23 @@ jobs: --run-in-parallel --force-build --platform ${{ matrix.platform }} - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Push CI latest image ${{ matrix.platform }}" run: > breeze ci-image build --tag-as-latest --push --run-in-parallel --platform ${{ matrix.platform }} # We only push "amd" image as it is really only needed for any kind of automated builds in CI # and currently there is not an easy way to make multi-platform image from two separate builds - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && matrix.platform == 'linux/arm64' && needs.build-info.outputs.merge-run == 'true' + if: always() && matrix.platform == 'linux/arm64' && needs.build-info.outputs.canary-run == 'true' - name: "Clean docker cache for ${{ matrix.platform }}" run: docker system prune --all --force - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Fix ownership" run: breeze ci fix-ownership - if: always() && needs.build-info.outputs.merge-run == 'true' + if: always() && needs.build-info.outputs.canary-run == 'true' # Check that after earlier cache push, breeze command will build quickly chcek-that-image-builds-quicklly: @@ -323,30 +321,30 @@ jobs: steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 with: persist-credentials: false - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Setup python" uses: actions/setup-python@v4 with: python-version: ${{ needs.build-info.outputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/setup* - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Free space" run: breeze ci free-space - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Check that image builds quickly" run: breeze shell --max-time 120 - if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.merge-run == 'true' + if: matrix.platform == 'linux/amd64' && needs.build-info.outputs.canary-run == 'true' - name: "Fix ownership" run: breeze ci fix-ownership - if: always() && needs.build-info.outputs.merge-run == 'true' + if: always() && needs.build-info.outputs.canary-run == 'true' build-ci-images: permissions: @@ -1636,20 +1634,20 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: Checkout ${{ steps.constraints-branch.outputs.branch }} uses: actions/checkout@v3 - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' with: path: "repo" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: false - name: "Commit changed constraint files for ${{needs.build-info.outputs.python-versions}}" run: ./scripts/ci/constraints/ci_commit_constraints.sh - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' - name: "Push changes" uses: ./.github/actions/github-push-action - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: ${{ steps.constraints-branch.outputs.branch }} @@ -1672,7 +1670,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - build-info - constraints - docs - if: needs.build-info.outputs.merge-run == 'true' + if: needs.build-info.outputs.canary-run == 'true' strategy: fail-fast: false matrix: @@ -1776,7 +1774,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: > needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' && needs.build-info.outputs.in-workflow-build == 'true' && - needs.build-info.outputs.merge-run != 'true' + needs.build-info.outputs.canary-run != 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" diff --git a/CI.rst b/CI.rst index 1b86a43b41e13..ccabd58463a96 100644 --- a/CI.rst +++ b/CI.rst @@ -21,7 +21,7 @@ CI Environment ============== Continuous Integration is important component of making Apache Airflow robust and stable. We are running -a lot of tests for every pull request, for main and v2-*-test branches and regularly as CRON jobs. +a lot of tests for every pull request, for main and v2-*-test branches and regularly as scheduled jobs. Our execution environment for CI is `GitHub Actions `_. GitHub Actions (GA) are very well integrated with GitHub code and Workflow and it has evolved fast in 2019/202 to become @@ -84,6 +84,13 @@ We use `GitHub Container Registry [Write Token] activate Airflow Repo Airflow Repo -->> Tests: Trigger 'push' @@ -199,6 +199,7 @@ sequenceDiagram and Note over Tests: Build CI Images
Use original constraints Tests ->> GitHub Registry: Push CI Image Early cache + latest + Note over Tests: Test 'breeze' image build quickly end Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] @@ -257,7 +258,7 @@ sequenceDiagram deactivate Tests ``` -## Scheduled build flow +## Scheduled run ```mermaid sequenceDiagram @@ -279,6 +280,7 @@ sequenceDiagram and Note over Tests: Build CI Images
Use original constraints Tests ->> GitHub Registry: Push CI Image Early cache + latest + Note over Tests: Test 'breeze' image build quickly end Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_commands.py index 9a6550cf02725..0c46c5fcf7941 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands.py @@ -297,7 +297,7 @@ def print_ga_outputs(self): print(get_ga_output(name="runs-on", value=self.get_runs_on())) print(get_ga_output(name='in-workflow-build', value=self.in_workflow_build())) print(get_ga_output(name="build-job-description", value=self.get_build_job_description())) - print(get_ga_output(name="merge-run", value=self.is_merge_run())) + print(get_ga_output(name="canary-run", value=self.is_canary_run())) print(get_ga_output(name="run-coverage", value=self.run_coverage())) def get_runs_on(self) -> str: @@ -319,7 +319,7 @@ def get_build_job_description(self) -> str: return "Build" return "Skip Build (look in pull_request_target)" - def is_merge_run(self) -> str: + def is_canary_run(self) -> str: if ( self.event_name == 'push' and self.head_repo == "apache/airflow" diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index b32de8d7547cf..c5d93ced54ca5 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -29,13 +29,7 @@ RUNS_ON_PUBLIC_RUNNER = "ubuntu-20.04" RUNS_ON_SELF_HOSTED_RUNNER = "self-hosted" -# Commented this out as we are using buildkit and this vars became irrelevant -# FORCE_PULL_IMAGES = False -# CHECK_IF_BASE_PYTHON_IMAGE_UPDATED = False -FORCE_BUILD_IMAGES = False ANSWER = "" -SKIP_CHECK_REMOTE_IMAGE = False -# PUSH_PYTHON_BASE_IMAGE = False APACHE_AIRFLOW_GITHUB_REPOSITORY = "apache/airflow" diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 8172a40685882..df2ce47ed0bb9 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -580,7 +580,6 @@ def update_expected_environment_variables(env: Dict[str, str]) -> None: set_value_to_default_if_not_set(env, 'DEFAULT_BRANCH', AIRFLOW_BRANCH) set_value_to_default_if_not_set(env, 'ENABLED_SYSTEMS', "") set_value_to_default_if_not_set(env, 'ENABLE_TEST_COVERAGE', "false") - set_value_to_default_if_not_set(env, 'GITHUB_REGISTRY_PULL_IMAGE_TAG', "latest") set_value_to_default_if_not_set(env, 'HOST_GROUP_ID', get_host_group_id()) set_value_to_default_if_not_set(env, 'HOST_OS', get_host_os()) set_value_to_default_if_not_set(env, 'HOST_USER_ID', get_host_user_id()) diff --git a/dev/breeze/tests/test_pr_info.py b/dev/breeze/tests/test_pr_info.py index ca2ada5ecdf7a..58e75f09c802b 100644 --- a/dev/breeze/tests/test_pr_info.py +++ b/dev/breeze/tests/test_pr_info.py @@ -39,7 +39,7 @@ def test_pr_info(): assert wi.event_name == 'pull_request' assert wi.pr_number == 26004 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -53,7 +53,7 @@ def test_push_info(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "true" + assert wi.is_canary_run() == "true" assert wi.run_coverage() == "true" @@ -67,7 +67,7 @@ def test_schedule(): assert wi.event_name == 'schedule' assert wi.pr_number is None assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -81,7 +81,7 @@ def test_runs_on_self_hosted(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -95,7 +95,7 @@ def test_runs_on_forced_public_runner(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -109,7 +109,7 @@ def test_runs_on_simple_pr_other_repo(): assert wi.event_name == 'pull_request' assert wi.pr_number == 1234 assert wi.get_runs_on() == "ubuntu-20.04" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -123,7 +123,7 @@ def test_runs_on_push_other_branch(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "false" + assert wi.is_canary_run() == "false" assert wi.run_coverage() == "false" @@ -137,5 +137,5 @@ def test_runs_on_push_v_test_branch(): assert wi.event_name == 'push' assert wi.pr_number is None assert wi.get_runs_on() == "self-hosted" - assert wi.is_merge_run() == "true" + assert wi.is_canary_run() == "true" assert wi.run_coverage() == "false" diff --git a/scripts/ci/docker-compose/_docker.env b/scripts/ci/docker-compose/_docker.env index 8b90912eaa592..3dafb73f6867b 100644 --- a/scripts/ci/docker-compose/_docker.env +++ b/scripts/ci/docker-compose/_docker.env @@ -35,7 +35,6 @@ ENABLED_INTEGRATIONS ENABLED_SYSTEMS ENABLE_TEST_COVERAGE GITHUB_ACTIONS -GITHUB_REGISTRY_PULL_IMAGE_TAG HOST_USER_ID HOST_GROUP_ID HOST_OS diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index 7904bc74fb873..b5d0f895b59aa 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -48,7 +48,6 @@ services: - ENABLED_SYSTEMS=${ENABLED_SYSTEMS} - ENABLE_TEST_COVERAGE=${ENABLE_TEST_COVERAGE} - GITHUB_ACTIONS=${GITHUB_ACTIONS} - - GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG} - HOST_USER_ID=${HOST_USER_ID} - HOST_GROUP_ID=${HOST_GROUP_ID} - HOST_OS=${HOST_OS} diff --git a/scripts/ci/docker-compose/devcontainer.env b/scripts/ci/docker-compose/devcontainer.env index 3f40808d986cc..4f10d5beb6c7d 100644 --- a/scripts/ci/docker-compose/devcontainer.env +++ b/scripts/ci/docker-compose/devcontainer.env @@ -35,7 +35,6 @@ ENABLED_INTEGRATIONS= ENABLED_SYSTEMS= ENABLE_TEST_COVERAGE="false" GITHUB_ACTIONS="false" -GITHUB_REGISTRY_PULL_IMAGE_TAG="" HOST_USER_ID= HOST_GROUP_ID= HOST_OS="linux" diff --git a/scripts/ci/libraries/_all_libs.sh b/scripts/ci/libraries/_all_libs.sh index 87de95bfa09a8..da893d5dcb613 100755 --- a/scripts/ci/libraries/_all_libs.sh +++ b/scripts/ci/libraries/_all_libs.sh @@ -36,8 +36,6 @@ readonly SCRIPTS_CI_DIR . "${LIBRARIES_DIR}"/_sanity_checks.sh # shellcheck source=scripts/ci/libraries/_local_mounts.sh . "${LIBRARIES_DIR}"/_local_mounts.sh -# shellcheck source=scripts/ci/libraries/_md5sum.sh -. "${LIBRARIES_DIR}"/_md5sum.sh # shellcheck source=scripts/ci/libraries/_start_end.sh . "${LIBRARIES_DIR}"/_start_end.sh # shellcheck source=scripts/ci/libraries/_testing.sh diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index 2d7757d55f386..42b6896319e68 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -274,16 +274,10 @@ function initialization::initialize_mount_variables() { # Determine values of force settings function initialization::initialize_force_variables() { - # Determines whether to force build without checking if it is needed - # Can be overridden by '--force-build-images' flag. - export FORCE_BUILD_IMAGES=${FORCE_BUILD_IMAGES:="false"} # Can be set to "yes/no/quit" in order to force specified answer to all questions asked to the user. export ANSWER=${ANSWER:=""} - # Can be set to true to skip if the image is newer in registry - export SKIP_CHECK_REMOTE_IMAGE=${SKIP_CHECK_REMOTE_IMAGE:="false"} - # integrations are disabled by default export ENABLED_INTEGRATIONS=${ENABLED_INTEGRATIONS:=""} @@ -461,8 +455,6 @@ function initialization::initialize_git_variables() { } function initialization::initialize_github_variables() { - export GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG:="latest"} - export GITHUB_REGISTRY_PUSH_IMAGE_TAG=${GITHUB_REGISTRY_PUSH_IMAGE_TAG:="latest"} export GITHUB_REPOSITORY=${GITHUB_REPOSITORY:="apache/airflow"} # Allows to override the repository which is used as source of constraints during the build @@ -557,7 +549,7 @@ function initialization::get_docker_cache_image_names() { # Example: # ghcr.io/apache/airflow/main/ci/python3.8:latest # ghcr.io/apache/airflow/main/ci/python3.8: - export AIRFLOW_CI_IMAGE_WITH_TAG="${image_name}/${BRANCH_NAME}/ci/python${PYTHON_MAJOR_MINOR_VERSION}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + export AIRFLOW_CI_IMAGE_WITH_TAG="${image_name}/${BRANCH_NAME}/ci/python${PYTHON_MAJOR_MINOR_VERSION}:latest" # File that is touched when the CI image is built for the first time locally export BUILT_CI_IMAGE_FLAG_FILE="${BUILD_CACHE_DIR}/${BRANCH_NAME}/.built_${PYTHON_MAJOR_MINOR_VERSION}" @@ -585,9 +577,7 @@ Mount variables: Force variables: - FORCE_BUILD_IMAGES: ${FORCE_BUILD_IMAGES} ANSWER: ${ANSWER} - SKIP_CHECK_REMOTE_IMAGE: ${SKIP_CHECK_REMOTE_IMAGE} Host variables: @@ -643,7 +633,6 @@ Detected GitHub environment: GITHUB_REPOSITORY: '${GITHUB_REPOSITORY}' GITHUB_USERNAME: '${GITHUB_USERNAME}' GITHUB_REGISTRY_PULL_IMAGE_TAG: '${GITHUB_REGISTRY_PULL_IMAGE_TAG}' - GITHUB_REGISTRY_PUSH_IMAGE_TAG: '${GITHUB_REGISTRY_PUSH_IMAGE_TAG}' GITHUB_ACTIONS: '${GITHUB_ACTIONS=}' Initialization variables: @@ -772,7 +761,6 @@ function initialization::make_constants_read_only() { readonly ADDITIONAL_RUNTIME_APT_ENV readonly GITHUB_REGISTRY_PULL_IMAGE_TAG - readonly GITHUB_REGISTRY_PUSH_IMAGE_TAG readonly GITHUB_REPOSITORY readonly GITHUB_TOKEN diff --git a/scripts/ci/libraries/_md5sum.sh b/scripts/ci/libraries/_md5sum.sh deleted file mode 100644 index 1838936fc903a..0000000000000 --- a/scripts/ci/libraries/_md5sum.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -declare -a MODIFIED_FILES -# -# Verifies if stored md5sum of the file changed since the last time it was checked -# The md5sum files are stored in .build directory - you can delete this directory -# If you want to rebuild everything from the scratch -# -function md5sum::calculate_file_md5sum { - local file="${1}" - local md5sum - local md5sum_cache_dir="${BUILD_CACHE_DIR}/${BRANCH_NAME}/${PYTHON_MAJOR_MINOR_VERSION}/${THE_IMAGE_TYPE}" - mkdir -pv "${md5sum_cache_dir}" - md5sum=$(md5sum "${file}") - local md5sum_file - md5sum_file="${md5sum_cache_dir}"/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum - local md5sum_file_new - md5sum_file_new=${CACHE_TMP_FILE_DIR}/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum.new - echo "${md5sum}" > "${md5sum_file_new}" - local ret_code=0 - if [[ ! -f "${md5sum_file}" ]]; then - verbosity::print_info "Missing md5sum for ${file#"${AIRFLOW_SOURCES}"} (${md5sum_file#"${AIRFLOW_SOURCES}"})" - ret_code=1 - else - diff "${md5sum_file_new}" "${md5sum_file}" >/dev/null - local res=$? - if [[ "${res}" != "0" ]]; then - verbosity::print_info "The md5sum changed for ${file}: was $(cat "${md5sum_file}") now it is $(cat "${md5sum_file_new}")" - if [[ ${CI} == "true" ]]; then - echo "${COLOR_RED}The file has changed: ${file}${COLOR_RESET}" - echo "${COLOR_BLUE}==============================${COLOR_RESET}" - cat "${file}" - echo "${COLOR_BLUE}==============================${COLOR_RESET}" - fi - ret_code=1 - fi - fi - return ${ret_code} -} - -# -# Moves md5sum file from it's temporary location in CACHE_TMP_FILE_DIR to -# BUILD_CACHE_DIR - thus updating stored MD5 sum for the file -# -function md5sum::move_file_md5sum { - local file="${1}" - local md5sum_file - local md5sum_cache_dir="${BUILD_CACHE_DIR}/${BRANCH_NAME}/${PYTHON_MAJOR_MINOR_VERSION}/${THE_IMAGE_TYPE}" - mkdir -pv "${md5sum_cache_dir}" - md5sum_file="${md5sum_cache_dir}"/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum - local md5sum_file_new - md5sum_file_new=${CACHE_TMP_FILE_DIR}/$(basename "$(dirname "${file}")")-$(basename "${file}").md5sum.new - if [[ -f "${md5sum_file_new}" ]]; then - mv "${md5sum_file_new}" "${md5sum_file}" - verbosity::print_info "Updated md5sum file ${md5sum_file} for ${file}: $(cat "${md5sum_file}")" - fi -} - -# -# Stores md5sum files for all important files and -# records that we built the images locally so that next time we use -# it from the local docker cache rather than pull (unless forced) -# -function md5sum::update_all_md5() { - verbosity::print_info - verbosity::print_info "Updating md5sum files" - verbosity::print_info - for file in "${FILES_FOR_REBUILD_CHECK[@]}" - do - md5sum::move_file_md5sum "${AIRFLOW_SOURCES}/${file}" - done - mkdir -pv "${BUILD_CACHE_DIR}/${BRANCH_NAME}" - touch "${BUILT_CI_IMAGE_FLAG_FILE}" -} - -function md5sum::update_all_md5_with_group() { - start_end::group_start "Update MD5 hashes for pulled images" - md5sum::update_all_md5 - start_end::group_end -} - -function md5sum::calculate_md5sum_for_all_files() { - FILES_MODIFIED="false" - set +e - for file in "${FILES_FOR_REBUILD_CHECK[@]}" - do - if ! md5sum::calculate_file_md5sum "${AIRFLOW_SOURCES}/${file}"; then - FILES_MODIFIED="true" - MODIFIED_FILES+=( "${file}" ) - fi - done - set -e -} - -# -# Checks md5sum of all important files in order to optimise speed of running various operations -# That mount sources of Airflow to container and require docker image built with latest dependencies. -# the Docker image will only be marked for rebuilding only in case any of the important files change: -# * setup.py -# * setup.cfg -# * Dockerfile.ci -# -# This is needed because we want to skip rebuilding of the image when only airflow sources change but -# Trigger rebuild in case we need to change dependencies (setup.py, setup.cfg, change version of Airflow -# or the Dockerfile.ci itself changes. -# -# Another reason to skip rebuilding Docker is thar currently it takes a bit longer time than simple Docker -# We need to fix group permissions of files in Docker because different linux build services have -# different default umask and Docker uses group permissions in checking for cache invalidation. -# -# As result of this check - most of the static checks will start pretty much immediately. -# -function md5sum::check_if_docker_build_is_needed() { - verbosity::print_info - verbosity::print_info "Checking if image build is needed for ${THE_IMAGE_TYPE} image." - verbosity::print_info - if [[ ${FORCE_BUILD_IMAGES:=""} == "true" ]]; then - verbosity::print_info - verbosity::print_info "${COLOR_YELLOW}Docker image build is forced for ${THE_IMAGE_TYPE} image${COLOR_RESET}" - verbosity::print_info - md5sum::calculate_md5sum_for_all_files - needs_docker_build="true" - else - md5sum::calculate_md5sum_for_all_files - if [[ ${FILES_MODIFIED} == "true" ]]; then - needs_docker_build="true" - fi - if [[ ${needs_docker_build} == "true" ]]; then - verbosity::print_info - verbosity::print_info "${COLOR_YELLOW}The files were modified and likely the ${THE_IMAGE_TYPE} image needs rebuild: ${MODIFIED_FILES[*]}${COLOR_RESET}" - verbosity::print_info - else - verbosity::print_info - verbosity::print_info "${COLOR_GREEN}Docker image build is not needed for ${THE_IMAGE_TYPE} image!${COLOR_RESET}" - verbosity::print_info - fi - fi -}