diff --git a/.github/actions/setup-e2e/action.yaml b/.github/actions/setup-e2e/action.yaml index 52fa1f7330..e52a0301de 100644 --- a/.github/actions/setup-e2e/action.yaml +++ b/.github/actions/setup-e2e/action.yaml @@ -32,6 +32,10 @@ inputs: description: "If k3d is not required, set this to false" required: false default: "true" + require_k3d_storage: + description: "If local-path-storage is not required, set this to false" + required: false + default: "false" require_minikube: description: "If minikube is not required, set this to true and set require_k3d to false" required: false @@ -85,6 +89,7 @@ runs: with: agents: 3 ingress_port: ${{ inputs.ingress_port }} + storage: ${{ inputs.require_k3d_storage }} - name: Setup Minikube environment if: ${{ inputs.require_minikube == 'true' }} shell: bash diff --git a/.github/actions/setup-k3d/action.yaml b/.github/actions/setup-k3d/action.yaml index e957ac90bc..30eba3142a 100644 --- a/.github/actions/setup-k3d/action.yaml +++ b/.github/actions/setup-k3d/action.yaml @@ -36,6 +36,10 @@ inputs: description: "Number of agents" required: false default: "3" + storage: + description: "If true, the local-path-storage will be deployed" + required: false + default: "false" options: description: "Options for k3d cluster create command" required: false @@ -124,6 +128,11 @@ runs: echo $KUBECONFIG cat $KUBECONFIG cat /etc/hosts + - name: Set local path storage + if: ${{ inputs.storage == 'true' }} + shell: bash + run: | + make k3d/storage - name: Show Kubernetes Cluster Info shell: bash run: | diff --git a/.github/helm/values/values-rollout-agent.yaml b/.github/helm/values/values-rollout-agent.yaml new file mode 100644 index 0000000000..65801736a6 --- /dev/null +++ b/.github/helm/values/values-rollout-agent.yaml @@ -0,0 +1,84 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +defaults: + logging: + level: debug + networkPolicy: + enabled: true +gateway: + lb: + enabled: true + minReplicas: 1 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + gateway_config: + index_replica: 2 +agent: + minReplicas: 3 + maxReplicas: 3 + podManagementPolicy: Parallel + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + # We recommend to set this value long enough to ensure the backup speed of PV, since the Index is backed up at the end of the pod. + terminationGracePeriodSeconds: 600 + # This is the persistent volume settings. + # Please change it according to your environment. + persistentVolume: + enabled: true + accessMode: ReadWriteOncePod + storageClass: local-path + size: 500Mi + ngt: + auto_index_duration_limit: 2m + auto_index_check_duration: 30s + auto_index_length: 500 + dimension: 784 + enable_in_memory_mode: false + index_path: "/var/ngt/index" +discoverer: + minReplicas: 1 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi +manager: + index: + replicas: 1 + resources: + requests: + cpu: 100m + memory: 30Mi + indexer: + auto_index_duration_limit: 2m + auto_index_check_duration: 30s + auto_index_length: 1000 + corrector: + enabled: true + # suspend because you do not want corrector to start automatically in CI + # instead run it manually + suspend: true + schedule: "1 2 3 4 5" diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 774161c298..4e05e86036 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -348,6 +348,49 @@ jobs: e2e/readreplica env: POD_NAME: ${{ steps.deploy_vald_readreplica.outputs.POD_NAME }} + e2e-stream-crud-with-rollout-restart-agent: + name: "E2E test (Stream CRUD) with rollout restart agent" + needs: [detect-ci-container] + runs-on: ubuntu-latest + timeout-minutes: 60 + container: + image: ghcr.io/vdaas/vald/vald-ci-container:${{ needs.detect-ci-container.outputs.TAG }} + options: "--add-host host.docker.internal:host-gateway" + steps: + - uses: actions/checkout@v4 + - name: Set Git config + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Setup E2E environment + id: setup_e2e + uses: ./.github/actions/setup-e2e + with: + require_k3d_storage: true + - name: Deploy Vald + id: deploy_vald + uses: ./.github/actions/e2e-deploy-vald + with: + helm_extra_options: ${{ steps.setup_e2e.outputs.HELM_EXTRA_OPTIONS }} + values: .github/helm/values/vald-rollout-agent.yaml + wait_for_selector: app=vald-agent + - name: Run E2E CRUD with read replica rotation + run: | + make hack/benchmark/assets/dataset/${{ env.DATASET }} + make E2E_BIND_PORT=8081 \ + E2E_DATASET_NAME=${{ env.DATASET }} \ + E2E_INSERT_COUNT=1000 \ + E2E_SEARCH_COUNT=1000 \ + E2E_SEARCH_BY_ID_COUNT=1000 \ + E2E_GET_OBJECT_COUNT=100 \ + E2E_UPDATE_COUNT=100 \ + E2E_UPSERT_COUNT=100 \ + E2E_REMOVE_COUNT=100 \ + E2E_WAIT_FOR_CREATE_INDEX_DURATION=3m \ + E2E_TARGET_POD_NAME=${POD_NAME} \ + E2E_TARGET_NAMESPACE=default \ + e2e/rollout/restart/agent + env: + POD_NAME: ${{ steps.deploy_vald_readreplica.outputs.POD_NAME }} e2e-stream-crud-with-mirror: name: "E2E test (Stream CRUD) with mirror" needs: [detect-ci-container] @@ -417,6 +460,7 @@ jobs: - e2e-stream-crud-under-index-management-jobs - e2e-stream-crud-with-mirror - e2e-stream-crud-with-readreplica + - e2e-stream-crud-with-rollout-restart-agent runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/Makefile b/Makefile index 47acef0862..1be4750b06 100644 --- a/Makefile +++ b/Makefile @@ -367,6 +367,7 @@ E2E_UPDATE_COUNT ?= 10 E2E_UPSERT_COUNT ?= 10 E2E_WAIT_FOR_CREATE_INDEX_DURATION ?= 8m E2E_WAIT_FOR_START_TIMEOUT ?= 10m +E2E_WAIT_FOR_RESOURCE_READY ?= 3m E2E_SEARCH_FROM ?= 0 E2E_SEARCH_BY_ID_FROM ?= 0 E2E_INSERT_FROM ?= 0 diff --git a/Makefile.d/e2e.mk b/Makefile.d/e2e.mk index ea31aa5dcf..8e5a53ac6d 100644 --- a/Makefile.d/e2e.mk +++ b/Makefile.d/e2e.mk @@ -91,7 +91,7 @@ e2e/readreplica: .PHONY: e2e/rollaout/restart/agent ## run rollout-restart agent e2e -e2e/rolloout/restart/agent: +e2e/rollout/restart/agent: $(call run-e2e-crud-test,-run TestE2EAgentRolloutRestart) .PHONY: e2e/maxdim diff --git a/Makefile.d/functions.mk b/Makefile.d/functions.mk index 1802d7d7b6..51ae3a31f0 100644 --- a/Makefile.d/functions.mk +++ b/Makefile.d/functions.mk @@ -163,6 +163,7 @@ define run-e2e-crud-test -search-by-id-from=$(E2E_SEARCH_BY_ID_FROM) \ -get-object-from=$(E2E_GET_OBJECT_FROM) \ -wait-after-insert=$(E2E_WAIT_FOR_CREATE_INDEX_DURATION) \ + -wait-resource-ready=$(E2E_WAIT_FOR_RESOURCE_READY) \ -portforward=$(E2E_PORTFORWARD_ENABLED) \ -portforward-pod-name=$(E2E_TARGET_POD_NAME) \ -portforward-pod-port=$(E2E_TARGET_PORT) \ diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index d5e3f89068..982fcda586 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -63,7 +63,8 @@ var ( upsertFrom int removeFrom int - waitAfterInsertDuration time.Duration + waitAfterInsertDuration time.Duration + waitResourceReadyDuration time.Duration kubeClient client.Client namespace string @@ -97,6 +98,7 @@ func init() { datasetName := flag.String("dataset", "fashion-mnist-784-euclidean.hdf5", "dataset") waitAfterInsert := flag.String("wait-after-insert", "3m", "wait duration after inserting vectors") + waitResourceReady := flag.String("wait-resource-ready", "3m", "wait duration for resource ready") pf := flag.Bool("portforward", false, "enable port forwarding") pfPodName := flag.String("portforward-pod-name", "vald-gateway-0", "pod name (only for port forward)") @@ -132,6 +134,11 @@ func init() { if err != nil { panic(err) } + + waitResourceReadyDuration, err = time.ParseDuration(*waitResourceReady) + if err != nil { + panic(err) + } } func teardown() { @@ -1018,7 +1025,6 @@ func TestE2EAgentRolloutRestart(t *testing.T) { sleep(t, waitAfterInsertDuration) - // TODO Dipatch Search Inf-Loop searchFunc := func() { _ = op.Search(t, ctx, operation.Dataset{ Test: ds.Test[searchFrom : searchFrom+searchNum], @@ -1037,31 +1043,17 @@ func TestE2EAgentRolloutRestart(t *testing.T) { return default: searchFunc() - time.Sleep(1 * time.Second) + time.Sleep(10 * time.Second) } } }() - kubectl.RolloutRestart(ctx, t, "statefulset", "vald-agent") // Wait for StatefulSet to be ready - time.Sleep(10 * time.Second) - t.Log("waiting for agent pods ready...") - swg := sync.WaitGroup{} - swg.Add(1) - go func() { - defer swg.Done() - for { - ok, err := kubeClient.WaitForStatefulSetReady(ctx, namespace, "vald-agent", 10*time.Minute) - if err != nil { - t.Fatalf("an error occurred: %s", err) - } - if ok { - t.Log("statefulset is ok", ok) - break - } - } - }() - swg.Wait() + t.Log("rollout restart agent and waiting for agent pods ready...") + err = kubectl.RolloutResourceName(ctx, t, "statefulset", "vald-agent", waitResourceReadyDuration.String()) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } cnt, err := op.IndexInfo(t, ctx) if err != nil { diff --git a/tests/e2e/kubernetes/kubectl/kubectl.go b/tests/e2e/kubernetes/kubectl/kubectl.go index b403fc8c0f..04156dbfc4 100644 --- a/tests/e2e/kubernetes/kubectl/kubectl.go +++ b/tests/e2e/kubernetes/kubectl/kubectl.go @@ -19,9 +19,11 @@ package kubectl import ( + "bufio" "context" "fmt" "os/exec" + "strings" "testing" "github.com/vdaas/vald/internal/errors" @@ -40,14 +42,47 @@ func RolloutResource(ctx context.Context, t *testing.T, resource string) error { return runCmd(t, cmd) } -func RolloutRestart(ctx context.Context, t *testing.T, resource string, name string) error { +func RolloutResourceName( + ctx context.Context, t *testing.T, resource string, name string, timeout string, +) error { t.Helper() - cmd := exec.CommandContext(ctx, "kubectl", "rollout", "restart", resource, name) if err := runCmd(t, cmd); err != nil { return err } - return runCmd(t, cmd) + + r := strings.Join([]string{resource, name}, "/") + to := strings.Join([]string{"--timeout", timeout}, "=") + cmd = exec.CommandContext(ctx, "kubectl", "rollout", "status", r, "--watch", to) + stdout, err := cmd.StdoutPipe() + if err != nil { + return err + } + defer stdout.Close() + + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + defer stderr.Close() + + if err := cmd.Start(); err != nil { + return err + } + go func() { + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + fmt.Println(scanner.Text()) + } + }() + go func() { + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + fmt.Println("Error:", scanner.Text()) + } + }() + + return cmd.Wait() } // WaitResources waits for multiple resources to be ready.