From 71b03765779af43fec84d45a8e79a5a6fd3bb1d5 Mon Sep 17 00:00:00 2001 From: sychen Date: Mon, 20 Nov 2023 14:34:09 +0800 Subject: [PATCH] [KYUUBI #5617][TEST] Make flaky test `spnego batch rest client` reliable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # :mag: Description ## Issue References ๐Ÿ”— This pull request fixes #5617 ## Describe Your Solution ๐Ÿ”ง `batch_long_time` job sleep is changed from 10 seconds to 120 seconds to avoid spark executing and exiting when deleting the batch. When pulling the spark running log, make sure the spark driver is already running. ## Types of changes :bookmark: - [x] Bugfix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan ๐Ÿงช #### Behavior Without This Pull Request :coffin: ``` - basic batch rest client *** FAILED *** "{"success":false,"msg":"APPLICATION_NOT_FOUND"} Warning: Batch 1f3af615-721b-45dd-9f0f-cb976bfef988 is already in terminal state ERROR." did not contain ""success":true" (TestPrematureExit.scala:72) - spnego batch rest client *** FAILED *** "{"success":false,"msg":"APPLICATION_NOT_FOUND"} Warning: Batch 30b1e3d6-9996-4409-8b28-eead5258fc3b is already in terminal state ERROR." did not contain ""success":true" (TestPrematureExit.scala:72) ``` ``` - spnego batch rest client *** FAILED *** "{"success":false,"msg":"APPLICATION_NOT_FOUND"} Error: Failed to delete batch f20c041c-3246-4fca-9814-c4892cd83220, its current state is RUNNING" did not contain ""success":true" (TestPrematureExit.scala:72) ``` #### Behavior With This Pull Request :tada: GA pass #### Related Unit Tests BatchCliSuite.scala "basic batch rest client" "spnego batch rest client" --- # Checklists ## ๐Ÿ“ Author Self Checklist - [x] My code follows the [style guidelines](https://kyuubi.readthedocs.io/en/master/contributing/code/style.html) of this project - [x] I have performed a self-review - [x] I have commented my code, particularly in hard-to-understand areas - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] New and existing unit tests pass locally with my changes - [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) ## ๐Ÿ“ Committer Pre-Merge Checklist - [x] Pull request title is okay. - [x] No license issues. - [x] Milestone correctly set? - [x] Test coverage is ok - [x] Assignees are selected. - [x] Minimum number of approvals - [x] No changes are requested **Be nice. Be informative.** Closes #5722 from cxzl25/KYUUBI-5617. Closes #5617 ba2ca7b38 [sychen] invalidCount fb0586cb9 [sychen] retry 2d074572c [sychen] sleep 1 min Authored-by: sychen Signed-off-by: Shaoyun Chen --- .../server/rest/client/BatchCliSuite.scala | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala index bcf8c450eb8..599daa51377 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala @@ -83,7 +83,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat | resource: ${sparkBatchTestResource.get} | className: org.apache.spark.examples.DriverSubmissionTest | args: - | - 10 + | - 120 | configs: | spark.master: local | wait.completion: true @@ -147,14 +147,20 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "batch", batchId, "--size", - "2", + "100", "--username", ldapUser, "--password", ldapUserPasswd) - result = testPrematureExitForControlCli(logArgs, "") - val rows = result.split("\n") - assert(rows.length == 2) + eventually(timeout(60.seconds), interval(100.milliseconds)) { + invalidCount += 1 + result = testPrematureExitForControlCli(logArgs, "") + val rows = result.split("\n") + assert(rows.length >= 2) + // org.apache.spark.examples.DriverSubmissionTest output + assert(result.contains("Alive for")) + invalidCount -= 1 + } val deleteArgs = Array( "delete", @@ -168,7 +174,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat eventually(timeout(3.seconds), interval(200.milliseconds)) { assert(MetricsSystem.counterValue( - MetricsConstants.REST_CONN_TOTAL).getOrElse(0L) - totalConnections - invalidCount === 5) + MetricsConstants.REST_CONN_TOTAL).getOrElse(0L) - totalConnections - invalidCount >= 5) assert(MetricsSystem.counterValue(MetricsConstants.REST_CONN_OPEN).getOrElse(0L) === 0) } } @@ -206,12 +212,16 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "batch", batchId, "--size", - "2", + "100", "--authSchema", "spnego") - result = testPrematureExitForControlCli(logArgs, "") - val rows = result.split("\n") - assert(rows.length == 2) + eventually(timeout(60.seconds), interval(100.milliseconds)) { + result = testPrematureExitForControlCli(logArgs, "") + val rows = result.split("\n") + assert(rows.length >= 2) + // org.apache.spark.examples.DriverSubmissionTest output + assert(result.contains("Alive for")) + } val deleteArgs = Array( "delete",