From 617196c68eb64951274a24fd978785e3caa786b5 Mon Sep 17 00:00:00 2001 From: David Arthur Date: Wed, 29 Jan 2025 18:42:39 -0500 Subject: [PATCH] KAFKA-18636 Fix how we handle Gradle exits in CI (#18681) This patch removes the explicit failure of test tasks in Gradle when there is a flaky test. This also fixes a fall-through case in junit.py where we did not recognize an error prior to running the tests (such as the javadoc task). Additionally, this patch removes usages of ignoreFailures in our CI and changes the XML copy task to a finalizer task instead of doLast closure. Reviewers: Jun Rao , Chia-Ping Tsai --- .github/scripts/junit.py | 10 ++-- build.gradle | 120 ++++++++++++++++++--------------------- 2 files changed, 60 insertions(+), 70 deletions(-) diff --git a/.github/scripts/junit.py b/.github/scripts/junit.py index 5a2088f8ea196..d1a28739c7578 100644 --- a/.github/scripts/junit.py +++ b/.github/scripts/junit.py @@ -426,7 +426,7 @@ def split_report_path(base_path: str, report_path: str) -> Tuple[str, str]: else: logger.debug(f"Failing this step because the tests timed out. Thread dumps were not archived, check logs in JUnit step.") exit(1) - elif test_exit_code in (0, 1): + elif test_exit_code == 1 or quarantined_test_exit_code == 1: logger.debug(summary) if total_failures > 0: logger.debug(f"Failing this step due to {total_failures} test failures") @@ -435,7 +435,9 @@ def split_report_path(base_path: str, report_path: str) -> Tuple[str, str]: logger.debug(f"Failing this step due to {total_errors} test errors") exit(1) else: - exit(0) + logger.debug("There was an error during the test or quarantinedTest task. Please check the logs") + exit(1) else: - logger.debug(f"Gradle had unexpected exit code {test_exit_code}. Failing this step") - exit(1) + # Normal exit + logger.debug(summary) + exit(0) diff --git a/build.gradle b/build.gradle index eb96a0912405f..9a6c4f2714d12 100644 --- a/build.gradle +++ b/build.gradle @@ -105,6 +105,9 @@ ext { throw new GradleException("Unexpected value for keepAliveMode property. Expected one of $keepAliveValues, but received: $userKeepAliveModeString") } + // Used by :test and :quarantinedTest tasks + isGithubActions = System.getenv('GITHUB_ACTIONS') != null + // See README.md for details on this option and the reasoning for the default userScalaOptimizerMode = project.hasProperty("scalaOptimizerMode") ? scalaOptimizerMode : "inline-kafka" def scalaOptimizerValues = ["none", "method", "inline-kafka", "inline-scala"] @@ -160,6 +163,7 @@ ext { libs.log4j2Api, libs.log4j2Core ] + } allprojects { @@ -492,14 +496,55 @@ subprojects { // Gradle will run each test class, so we exclude the suites to avoid redundantly running the tests twice. def testsToExclude = ['**/*Suite.class'] - test { - ext { - isGithubActions = System.getenv('GITHUB_ACTIONS') != null - hadFailure = false // Used to track if any tests failed, see afterSuite below + + // These two tasks will copy JUnit XML files out of the sub-project's build directory and into + // a top-level build/junit-xml directory. This is necessary to avoid reporting on tests which + // were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details. + def copyTestXml = tasks.register('copyTestXml') { + onlyIf("Environment GITHUB_ACTIONS is set") { isGithubActions } + onlyIf("Project '${project.name}:test' has sources") { ! test.state.noSource } + onlyIf("Task '${project.name}:test' did work") { test.state.didWork } + + // Never cache this task + outputs.cacheIf { false } + outputs.upToDateWhen { false } + + doLast { + def moduleDirPath = projectToJUnitXmlPath(project) + def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}/test").get().asFile + println "Copy JUnit XML for ${project.name} to $dest" + ant.copy(todir: "$dest") { + ant.fileset(dir: "${test.reports.junitXml.entryPoint}") { + ant.include(name: "**/*.xml") + } + } } + } + + def copyQuarantinedTestXml = tasks.register('copyQuarantinedTestXml') { + onlyIf("Environment GITHUB_ACTIONS is set") { isGithubActions } + onlyIf("Project '${project.name}:quarantinedTest' has sources") { ! quarantinedTest.state.noSource } + onlyIf("Task '${project.name}:quarantinedTest' did work") { quarantinedTest.state.didWork } + // Never cache this task + outputs.cacheIf { false } + outputs.upToDateWhen { false } + + doLast { + def moduleDirPath = projectToJUnitXmlPath(project) + def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}/quarantinedTest").get().asFile + println "Copy JUnit XML for ${project.name} to $dest" + ant.copy(todir: "$dest") { + ant.fileset(dir: "${quarantinedTest.reports.junitXml.entryPoint}") { + ant.include(name: "**/*.xml") + } + } + } + } + + test { maxParallelForks = maxTestForks - ignoreFailures = userIgnoreFailures || ext.isGithubActions + ignoreFailures = userIgnoreFailures maxHeapSize = defaultMaxHeapSize jvmArgs = defaultJvmArgs @@ -531,48 +576,17 @@ subprojects { } } - // As we process results, check if there were any test failures. - afterSuite { desc, result -> - if (result.resultType == TestResult.ResultType.FAILURE) { - ext.hadFailure = true - } - } - - // This closure will copy JUnit XML files out of the sub-project's build directory and into - // a top-level build/junit-xml directory. This is necessary to avoid reporting on tests which - // were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details. - doLast { - if (ext.isGithubActions) { - def moduleDirPath = projectToJUnitXmlPath(project) - def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}/test").get().asFile - println "Copy JUnit XML for ${project.name} to $dest" - ant.copy(todir: "$dest") { - ant.fileset(dir: "${test.reports.junitXml.entryPoint}") - } - - // If there were any test failures, we want to fail the task to prevent the failures - // from being cached. - if (ext.hadFailure) { - throw new GradleException("Failing this task since '${project.name}:${name}' had test failures.") - } - } - } + finalizedBy("copyTestXml") } task quarantinedTest(type: Test, dependsOn: compileJava) { - ext { - isGithubActions = System.getenv('GITHUB_ACTIONS') != null - hadFailure = false // Used to track if any tests failed, see afterSuite below - } - // Disable caching and up-to-date for this task. We always want quarantined tests - // to run and never want to cache their results. Since we do this, we can avoid - // explicitly failing the build like we do in "test" with ext.hadFailure. + // to run and never want to cache their results. outputs.upToDateWhen { false } outputs.cacheIf { false } maxParallelForks = maxTestForks - ignoreFailures = userIgnoreFailures || ext.isGithubActions + ignoreFailures = userIgnoreFailures maxHeapSize = defaultMaxHeapSize jvmArgs = defaultJvmArgs @@ -601,33 +615,7 @@ subprojects { } } - // As we process results, check if there were any test failures. - afterSuite { desc, result -> - if (result.resultType == TestResult.ResultType.FAILURE) { - ext.hadFailure = true - } - } - - // This closure will copy JUnit XML files out of the sub-project's build directory and into - // a top-level build/junit-xml directory. This is necessary to avoid reporting on tests which - // were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details. - doLast { - if (ext.isGithubActions) { - def moduleDirPath = projectToJUnitXmlPath(project) - def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}/quarantinedTest").get().asFile - println "Copy JUnit XML for ${project.name} to $dest" - ant.copy(todir: "$dest", failonerror: "false") { - ant.fileset(dir: "${quarantinedTest.reports.junitXml.entryPoint}") { - ant.include(name: "**/*.xml") - } - } - // If there were any test failures, we want to fail the task to prevent the failures - // from being cached. - if (ext.hadFailure) { - throw new GradleException("Failing this task since '${project.name}:${name}' had test failures.") - } - } - } + finalizedBy("copyQuarantinedTestXml") } task integrationTest(type: Test, dependsOn: compileJava) {