From ea795a01cea556ec63de6f7a6ce973861b5a75a8 Mon Sep 17 00:00:00 2001 From: vankichi Date: Sat, 11 Jan 2025 00:41:18 +0900 Subject: [PATCH 01/13] :white_check_mark: Add rollout restart agent e2e test Signed-off-by: vankichi --- Makefile.d/e2e.mk | 5 ++ tests/e2e/crud/crud_test.go | 110 ++++++++++++++++++++++++ tests/e2e/kubernetes/client/client.go | 30 +++++++ tests/e2e/kubernetes/kubectl/kubectl.go | 10 +++ 4 files changed, 155 insertions(+) diff --git a/Makefile.d/e2e.mk b/Makefile.d/e2e.mk index 6ebdc3ad35..ce8bd5ee11 100644 --- a/Makefile.d/e2e.mk +++ b/Makefile.d/e2e.mk @@ -89,6 +89,11 @@ e2e/index/job/correction: e2e/readreplica: $(call run-e2e-crud-test,-run TestE2EReadReplica) +.PHONY: e2e/rollaout/restart/agent +## run rollout-restart agent e2e +e2e/rolloout/restart/agent: + $(call run-e2e-crud-test,-run TestE2EAgentRolloutRestart) + .PHONY: e2e/maxdim ## run e2e/maxdim e2e/maxdim: diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index bfda87cdd9..ef76bbae22 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -26,6 +26,7 @@ import ( "os" "os/exec" "strings" + "sync" "testing" "time" @@ -987,3 +988,112 @@ func TestE2EReadReplica(t *testing.T) { t.Fatalf("an error occurred: %s", err) } } + +// TestE2EReadReplica tests that search requests succeed with read replica resources. +func TestE2EAgentRolloutRestart(t *testing.T) { + t.Cleanup(teardown) + + if kubeClient == nil { + var err error + kubeClient, err = client.New(kubeConfig) + if err != nil { + t.Skipf("TestE2EReadReplica needs kubernetes client but failed to create one: %s", err) + } + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + op, err := operation.New(host, port) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + + err = op.Insert(t, ctx, operation.Dataset{ + Train: ds.Train[insertFrom : insertFrom+insertNum], + }) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + + sleep(t, waitAfterInsertDuration) + + // TODO Dipatch Search Inf-Loop + searchFunc := func() { + _ = op.Search(t, ctx, operation.Dataset{ + Test: ds.Test[searchFrom : searchFrom+searchNum], + Neighbors: ds.Neighbors[searchFrom : searchFrom+searchNum], + }) + } + + wg := sync.WaitGroup{} + wg.Add(1) + done := make(chan struct{}) + go func() { + defer wg.Done() + for { + searchFunc() + time.Sleep(1 * time.Second) + select { + case <-done: + return + default: + } + } + }() + kubectl.RolloutRestart(ctx, t, "statefulset", "vald-agent") + + // Wait for StatefulSet to be ready + t.Log("waiting for agent pods ready...") + swg := sync.WaitGroup{} + swg.Add(1) + go func() { + defer swg.Done() + for { + ok, err := kubeClient.WaitForStatefulSetReady(ctx, namespace, "vald-agent", 10*time.Minute) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + if ok { + return + } + continue + } + }() + swg.Wait() + + cnt, err := op.IndexInfo(t, ctx) + if err != nil { + t.Fatalf("an error occurred: count = %d, err = %s", cnt.Stored, err) + } + + // err = op.Exists(t, ctx, "0") + // if err != nil { + // t.Fatalf("an error occurred: %s", err) + // } + // + // err = op.GetObject(t, ctx, operation.Dataset{ + // Train: ds.Train[getObjectFrom : getObjectFrom+getObjectNum], + // }) + // if err != nil { + // t.Fatalf("an error occurred: %s", err) + // } + // + // err = op.Remove(t, ctx, operation.Dataset{ + // Train: ds.Train[removeFrom : removeFrom+removeNum], + // }) + // if err != nil { + // t.Fatalf("an error occurred: %s", err) + // } + // + // // Remove all vector data after the current - 1 hour. + // err = op.RemoveByTimestamp(t, ctx, time.Now().Add(-time.Hour).UnixNano()) + // if err != nil { + // t.Fatalf("an error occurred: %s", err) + // } + time.AfterFunc(5*time.Second, func() { + close(done) + fmt.Println("canceling all goroutines") + }) + wg.Wait() +} diff --git a/tests/e2e/kubernetes/client/client.go b/tests/e2e/kubernetes/client/client.go index cf41f5f515..71809efa16 100644 --- a/tests/e2e/kubernetes/client/client.go +++ b/tests/e2e/kubernetes/client/client.go @@ -68,6 +68,11 @@ type Client interface { name, namespace string, cronJob *v1.CronJob, ) error + WaitForStatefulSetReady( + ctx context.Context, + namespace, name string, + timeout time.Duration, + ) (ok bool, err error) } type client struct { @@ -201,3 +206,28 @@ func (cli *client) CreateJobFromCronJob( _, err := cli.clientset.BatchV1().Jobs(namespace).Create(ctx, job, metav1.CreateOptions{}) return err } + +func (cli *client) WaitForStatefulSetReady( + ctx context.Context, namespace, name string, timeout time.Duration, +) (ok bool, err error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + tick := time.NewTicker(time.Second) + defer tick.Stop() + + for { + ss, err := cli.clientset.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + if ss.Status.UpdatedReplicas == ss.Status.Replicas && ss.Status.ReadyReplicas == ss.Status.Replicas { + return true, nil + } + select { + case <-ctx.Done(): + return false, ctx.Err() + case <-tick.C: + } + } +} diff --git a/tests/e2e/kubernetes/kubectl/kubectl.go b/tests/e2e/kubernetes/kubectl/kubectl.go index 9e13d37482..5533eb1cbc 100644 --- a/tests/e2e/kubernetes/kubectl/kubectl.go +++ b/tests/e2e/kubernetes/kubectl/kubectl.go @@ -40,6 +40,16 @@ func RolloutResource(ctx context.Context, t *testing.T, resource string) error { return runCmd(t, cmd) } +func RolloutRestart(ctx context.Context, t *testing.T, resource string, name string) error { + t.Helper() + + cmd := exec.CommandContext(ctx, "kubectl", "rollout", "restart", resource, name) + if err := runCmd(t, cmd); err != nil { + return err + } + return runCmd(t, cmd) +} + // WaitResources waits for multiple resources to be ready. func WaitResources( ctx context.Context, t *testing.T, resource, labelSelector, condition, timeout string, From 2f6623b1e1b5ce4acadae4334cdc94753eee3cf5 Mon Sep 17 00:00:00 2001 From: vankichi Date: Sat, 11 Jan 2025 01:25:27 +0900 Subject: [PATCH 02/13] :recycle: Fix logic Signed-off-by: vankichi --- tests/e2e/crud/crud_test.go | 60 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index ef76bbae22..96203a80cb 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -1032,18 +1032,19 @@ func TestE2EAgentRolloutRestart(t *testing.T) { go func() { defer wg.Done() for { - searchFunc() - time.Sleep(1 * time.Second) select { case <-done: return default: + searchFunc() + time.Sleep(1 * time.Second) } } }() kubectl.RolloutRestart(ctx, t, "statefulset", "vald-agent") // Wait for StatefulSet to be ready + time.Sleep(10 * time.Second) t.Log("waiting for agent pods ready...") swg := sync.WaitGroup{} swg.Add(1) @@ -1055,9 +1056,9 @@ func TestE2EAgentRolloutRestart(t *testing.T) { t.Fatalf("an error occurred: %s", err) } if ok { - return + t.Log("statefulset is ok", ok) + break } - continue } }() swg.Wait() @@ -1067,33 +1068,30 @@ func TestE2EAgentRolloutRestart(t *testing.T) { t.Fatalf("an error occurred: count = %d, err = %s", cnt.Stored, err) } - // err = op.Exists(t, ctx, "0") - // if err != nil { - // t.Fatalf("an error occurred: %s", err) - // } - // - // err = op.GetObject(t, ctx, operation.Dataset{ - // Train: ds.Train[getObjectFrom : getObjectFrom+getObjectNum], - // }) - // if err != nil { - // t.Fatalf("an error occurred: %s", err) - // } - // - // err = op.Remove(t, ctx, operation.Dataset{ - // Train: ds.Train[removeFrom : removeFrom+removeNum], - // }) - // if err != nil { - // t.Fatalf("an error occurred: %s", err) - // } - // - // // Remove all vector data after the current - 1 hour. - // err = op.RemoveByTimestamp(t, ctx, time.Now().Add(-time.Hour).UnixNano()) - // if err != nil { - // t.Fatalf("an error occurred: %s", err) - // } - time.AfterFunc(5*time.Second, func() { - close(done) - fmt.Println("canceling all goroutines") + err = op.Exists(t, ctx, "0") + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + + err = op.GetObject(t, ctx, operation.Dataset{ + Train: ds.Train[getObjectFrom : getObjectFrom+getObjectNum], + }) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + + err = op.Remove(t, ctx, operation.Dataset{ + Train: ds.Train[removeFrom : removeFrom+removeNum], }) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + + // Remove all vector data after the current - 1 hour. + err = op.RemoveByTimestamp(t, ctx, time.Now().Add(-time.Hour).UnixNano()) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } + close(done) wg.Wait() } From 4fb082b3dac9e2254d4f75aefa835c6bdbfe7bcd Mon Sep 17 00:00:00 2001 From: vankichi Date: Tue, 14 Jan 2025 18:35:13 +0900 Subject: [PATCH 03/13] :white_check_mark: Add e2e Signed-off-by: vankichi --- .github/actions/setup-e2e/action.yaml | 5 ++ .github/actions/setup-k3d/action.yaml | 9 ++ .github/helm/values/values-rollout-agent.yaml | 84 +++++++++++++++++++ .github/workflows/e2e.yaml | 44 ++++++++++ Makefile | 1 + Makefile.d/e2e.mk | 2 +- Makefile.d/functions.mk | 1 + tests/e2e/crud/crud_test.go | 36 ++++---- tests/e2e/kubernetes/kubectl/kubectl.go | 41 ++++++++- 9 files changed, 197 insertions(+), 26 deletions(-) create mode 100644 .github/helm/values/values-rollout-agent.yaml diff --git a/.github/actions/setup-e2e/action.yaml b/.github/actions/setup-e2e/action.yaml index 343b091867..659bc5ff35 100644 --- a/.github/actions/setup-e2e/action.yaml +++ b/.github/actions/setup-e2e/action.yaml @@ -32,6 +32,10 @@ inputs: description: "If k3d is not required, set this to false" required: false default: "true" + require_k3d_storage: + description: "If local-path-storage is not required, set this to false" + required: false + default: "false" require_minikube: description: "If minikube is not required, set this to true and set require_k3d to false" required: false @@ -85,6 +89,7 @@ runs: with: agents: 3 ingress_port: ${{ inputs.ingress_port }} + storage: ${{ inputs.require_k3d_storage }} - name: Setup Minikube environment if: ${{ inputs.require_minikube == 'true' }} shell: bash diff --git a/.github/actions/setup-k3d/action.yaml b/.github/actions/setup-k3d/action.yaml index 19f0d0cf87..777bf95f11 100644 --- a/.github/actions/setup-k3d/action.yaml +++ b/.github/actions/setup-k3d/action.yaml @@ -36,6 +36,10 @@ inputs: description: "Number of agents" required: false default: "3" + storage: + description: "If true, the local-path-storage will be deployed" + required: false + default: "false" options: description: "Options for k3d cluster create command" required: false @@ -124,6 +128,11 @@ runs: echo $KUBECONFIG cat $KUBECONFIG cat /etc/hosts + - name: Set local path storage + if: ${{ inputs.storage == 'true' }} + shell: bash + run: | + make k3d/storage - name: Show Kubernetes Cluster Info shell: bash run: | diff --git a/.github/helm/values/values-rollout-agent.yaml b/.github/helm/values/values-rollout-agent.yaml new file mode 100644 index 0000000000..65801736a6 --- /dev/null +++ b/.github/helm/values/values-rollout-agent.yaml @@ -0,0 +1,84 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +defaults: + logging: + level: debug + networkPolicy: + enabled: true +gateway: + lb: + enabled: true + minReplicas: 1 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + gateway_config: + index_replica: 2 +agent: + minReplicas: 3 + maxReplicas: 3 + podManagementPolicy: Parallel + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi + # We recommend to set this value long enough to ensure the backup speed of PV, since the Index is backed up at the end of the pod. + terminationGracePeriodSeconds: 600 + # This is the persistent volume settings. + # Please change it according to your environment. + persistentVolume: + enabled: true + accessMode: ReadWriteOncePod + storageClass: local-path + size: 500Mi + ngt: + auto_index_duration_limit: 2m + auto_index_check_duration: 30s + auto_index_length: 500 + dimension: 784 + enable_in_memory_mode: false + index_path: "/var/ngt/index" +discoverer: + minReplicas: 1 + hpa: + enabled: false + resources: + requests: + cpu: 100m + memory: 50Mi +manager: + index: + replicas: 1 + resources: + requests: + cpu: 100m + memory: 30Mi + indexer: + auto_index_duration_limit: 2m + auto_index_check_duration: 30s + auto_index_length: 1000 + corrector: + enabled: true + # suspend because you do not want corrector to start automatically in CI + # instead run it manually + suspend: true + schedule: "1 2 3 4 5" diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index cd0b01df30..eb8da0c206 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -348,6 +348,49 @@ jobs: e2e/readreplica env: POD_NAME: ${{ steps.deploy_vald_readreplica.outputs.POD_NAME }} + e2e-stream-crud-with-rollout-restart-agent: + name: "E2E test (Stream CRUD) with rollout restart agent" + needs: [detect-ci-container] + runs-on: ubuntu-latest + timeout-minutes: 60 + container: + image: ghcr.io/vdaas/vald/vald-ci-container:${{ needs.detect-ci-container.outputs.TAG }} + options: "--add-host host.docker.internal:host-gateway" + steps: + - uses: actions/checkout@v4 + - name: Set Git config + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Setup E2E environment + id: setup_e2e + uses: ./.github/actions/setup-e2e + with: + require_k3d_storage: true + - name: Deploy Vald + id: deploy_vald + uses: ./.github/actions/e2e-deploy-vald + with: + helm_extra_options: ${{ steps.setup_e2e.outputs.HELM_EXTRA_OPTIONS }} + values: .github/helm/values/values-rollout-agent.yaml + wait_for_selector: app=vald-agent + - name: Run E2E CRUD with read replica rotation + run: | + make hack/benchmark/assets/dataset/${{ env.DATASET }} + make E2E_BIND_PORT=8081 \ + E2E_DATASET_NAME=${{ env.DATASET }} \ + E2E_INSERT_COUNT=1000 \ + E2E_SEARCH_COUNT=1000 \ + E2E_SEARCH_BY_ID_COUNT=1000 \ + E2E_GET_OBJECT_COUNT=100 \ + E2E_UPDATE_COUNT=100 \ + E2E_UPSERT_COUNT=100 \ + E2E_REMOVE_COUNT=100 \ + E2E_WAIT_FOR_CREATE_INDEX_DURATION=3m \ + E2E_TARGET_POD_NAME=${POD_NAME} \ + E2E_TARGET_NAMESPACE=default \ + e2e/rollout/restart/agent + env: + POD_NAME: ${{ steps.deploy_vald_readreplica.outputs.POD_NAME }} e2e-stream-crud-with-mirror: name: "E2E test (Stream CRUD) with mirror" needs: [detect-ci-container] @@ -417,6 +460,7 @@ jobs: - e2e-stream-crud-under-index-management-jobs - e2e-stream-crud-with-mirror - e2e-stream-crud-with-readreplica + - e2e-stream-crud-with-rollout-restart-agent runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/Makefile b/Makefile index 73645e3120..a364ce707a 100644 --- a/Makefile +++ b/Makefile @@ -367,6 +367,7 @@ E2E_UPDATE_COUNT ?= 10 E2E_UPSERT_COUNT ?= 10 E2E_WAIT_FOR_CREATE_INDEX_DURATION ?= 8m E2E_WAIT_FOR_START_TIMEOUT ?= 10m +E2E_WAIT_FOR_RESOURCE_READY ?= 3m E2E_SEARCH_FROM ?= 0 E2E_SEARCH_BY_ID_FROM ?= 0 E2E_INSERT_FROM ?= 0 diff --git a/Makefile.d/e2e.mk b/Makefile.d/e2e.mk index ce8bd5ee11..6701742265 100644 --- a/Makefile.d/e2e.mk +++ b/Makefile.d/e2e.mk @@ -91,7 +91,7 @@ e2e/readreplica: .PHONY: e2e/rollaout/restart/agent ## run rollout-restart agent e2e -e2e/rolloout/restart/agent: +e2e/rollout/restart/agent: $(call run-e2e-crud-test,-run TestE2EAgentRolloutRestart) .PHONY: e2e/maxdim diff --git a/Makefile.d/functions.mk b/Makefile.d/functions.mk index fb4feb167f..dc440a66cf 100644 --- a/Makefile.d/functions.mk +++ b/Makefile.d/functions.mk @@ -163,6 +163,7 @@ define run-e2e-crud-test -search-by-id-from=$(E2E_SEARCH_BY_ID_FROM) \ -get-object-from=$(E2E_GET_OBJECT_FROM) \ -wait-after-insert=$(E2E_WAIT_FOR_CREATE_INDEX_DURATION) \ + -wait-resource-ready=$(E2E_WAIT_FOR_RESOURCE_READY) \ -portforward=$(E2E_PORTFORWARD_ENABLED) \ -portforward-pod-name=$(E2E_TARGET_POD_NAME) \ -portforward-pod-port=$(E2E_TARGET_PORT) \ diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index 96203a80cb..a286f2d586 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -63,7 +63,8 @@ var ( upsertFrom int removeFrom int - waitAfterInsertDuration time.Duration + waitAfterInsertDuration time.Duration + waitResourceReadyDuration time.Duration kubeClient client.Client namespace string @@ -97,6 +98,7 @@ func init() { datasetName := flag.String("dataset", "fashion-mnist-784-euclidean.hdf5", "dataset") waitAfterInsert := flag.String("wait-after-insert", "3m", "wait duration after inserting vectors") + waitResourceReady := flag.String("wait-resource-ready", "3m", "wait duration for resource ready") pf := flag.Bool("portforward", false, "enable port forwarding") pfPodName := flag.String("portforward-pod-name", "vald-gateway-0", "pod name (only for port forward)") @@ -132,6 +134,11 @@ func init() { if err != nil { panic(err) } + + waitResourceReadyDuration, err = time.ParseDuration(*waitResourceReady) + if err != nil { + panic(err) + } } func teardown() { @@ -1018,7 +1025,6 @@ func TestE2EAgentRolloutRestart(t *testing.T) { sleep(t, waitAfterInsertDuration) - // TODO Dipatch Search Inf-Loop searchFunc := func() { _ = op.Search(t, ctx, operation.Dataset{ Test: ds.Test[searchFrom : searchFrom+searchNum], @@ -1037,31 +1043,17 @@ func TestE2EAgentRolloutRestart(t *testing.T) { return default: searchFunc() - time.Sleep(1 * time.Second) + time.Sleep(10 * time.Second) } } }() - kubectl.RolloutRestart(ctx, t, "statefulset", "vald-agent") // Wait for StatefulSet to be ready - time.Sleep(10 * time.Second) - t.Log("waiting for agent pods ready...") - swg := sync.WaitGroup{} - swg.Add(1) - go func() { - defer swg.Done() - for { - ok, err := kubeClient.WaitForStatefulSetReady(ctx, namespace, "vald-agent", 10*time.Minute) - if err != nil { - t.Fatalf("an error occurred: %s", err) - } - if ok { - t.Log("statefulset is ok", ok) - break - } - } - }() - swg.Wait() + t.Log("rollout restart agent and waiting for agent pods ready...") + err = kubectl.RolloutResourceName(ctx, t, "statefulset", "vald-agent", waitResourceReadyDuration.String()) + if err != nil { + t.Fatalf("an error occurred: %s", err) + } cnt, err := op.IndexInfo(t, ctx) if err != nil { diff --git a/tests/e2e/kubernetes/kubectl/kubectl.go b/tests/e2e/kubernetes/kubectl/kubectl.go index 5533eb1cbc..92f917737a 100644 --- a/tests/e2e/kubernetes/kubectl/kubectl.go +++ b/tests/e2e/kubernetes/kubectl/kubectl.go @@ -19,9 +19,11 @@ package kubectl import ( + "bufio" "context" "fmt" "os/exec" + "strings" "testing" "github.com/vdaas/vald/internal/errors" @@ -40,14 +42,47 @@ func RolloutResource(ctx context.Context, t *testing.T, resource string) error { return runCmd(t, cmd) } -func RolloutRestart(ctx context.Context, t *testing.T, resource string, name string) error { +func RolloutResourceName( + ctx context.Context, t *testing.T, resource string, name string, timeout string, +) error { t.Helper() - cmd := exec.CommandContext(ctx, "kubectl", "rollout", "restart", resource, name) if err := runCmd(t, cmd); err != nil { return err } - return runCmd(t, cmd) + + r := strings.Join([]string{resource, name}, "/") + to := strings.Join([]string{"--timeout", timeout}, "=") + cmd = exec.CommandContext(ctx, "kubectl", "rollout", "status", r, "--watch", to) + stdout, err := cmd.StdoutPipe() + if err != nil { + return err + } + defer stdout.Close() + + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + defer stderr.Close() + + if err := cmd.Start(); err != nil { + return err + } + go func() { + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + fmt.Println(scanner.Text()) + } + }() + go func() { + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + fmt.Println("Error:", scanner.Text()) + } + }() + + return cmd.Wait() } // WaitResources waits for multiple resources to be ready. From bd747ff3ac26a926aa06643552051d5ac723af4b Mon Sep 17 00:00:00 2001 From: vankichi Date: Tue, 14 Jan 2025 23:43:19 +0900 Subject: [PATCH 04/13] :white_check_mark: Fix CI Err Signed-off-by: vankichi --- .github/workflows/e2e.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index eb8da0c206..d70db8fa11 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -372,8 +372,8 @@ jobs: with: helm_extra_options: ${{ steps.setup_e2e.outputs.HELM_EXTRA_OPTIONS }} values: .github/helm/values/values-rollout-agent.yaml - wait_for_selector: app=vald-agent - - name: Run E2E CRUD with read replica rotation + wait_for_selector: app=vald-lb-gateway + - name: Run E2E CRUD with rollout restart agent run: | make hack/benchmark/assets/dataset/${{ env.DATASET }} make E2E_BIND_PORT=8081 \ @@ -390,7 +390,7 @@ jobs: E2E_TARGET_NAMESPACE=default \ e2e/rollout/restart/agent env: - POD_NAME: ${{ steps.deploy_vald_readreplica.outputs.POD_NAME }} + POD_NAME: ${{ steps.deploy_vald.outputs.POD_NAME }} e2e-stream-crud-with-mirror: name: "E2E test (Stream CRUD) with mirror" needs: [detect-ci-container] From 6141552a37adb70b55e2d4f2aa2a1deb63640436 Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 15 Jan 2025 10:43:01 +0900 Subject: [PATCH 05/13] :white_check_mark: Fix Signed-off-by: vankichi --- tests/e2e/crud/crud_test.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index a286f2d586..2ef50f16fa 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -1025,14 +1025,16 @@ func TestE2EAgentRolloutRestart(t *testing.T) { sleep(t, waitAfterInsertDuration) - searchFunc := func() { - _ = op.Search(t, ctx, operation.Dataset{ + searchFunc := func() error { + return op.Search(t, ctx, operation.Dataset{ Test: ds.Test[searchFrom : searchFrom+searchNum], Neighbors: ds.Neighbors[searchFrom : searchFrom+searchNum], }) } wg := sync.WaitGroup{} + mu := sync.Mutex{} + var serr error wg.Add(1) done := make(chan struct{}) go func() { @@ -1042,7 +1044,16 @@ func TestE2EAgentRolloutRestart(t *testing.T) { case <-done: return default: - searchFunc() + err = searchFunc() + if err != nil { + st, ok := status.FromError(err) + if ok && st.Code() == codes.DeadlineExceeded { + _, _, rerr := status.ParseError(err, codes.DeadlineExceeded, "an error occurred") + mu.Lock() + serr = errors.Join(serr, rerr) + mu.Unlock() + } + } time.Sleep(10 * time.Second) } } @@ -1086,4 +1097,7 @@ func TestE2EAgentRolloutRestart(t *testing.T) { } close(done) wg.Wait() + if serr != nil { + t.Fatalf("an error occurred: %s", serr) + } } From f438c7cecc378123a00e9dc9814aa3bd7fbad5c4 Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 15 Jan 2025 11:58:22 +0900 Subject: [PATCH 06/13] :recycle: Fix format Signed-off-by: vankichi --- .github/helm/values/values-rollout-agent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/helm/values/values-rollout-agent.yaml b/.github/helm/values/values-rollout-agent.yaml index 65801736a6..1a6b55bf90 100644 --- a/.github/helm/values/values-rollout-agent.yaml +++ b/.github/helm/values/values-rollout-agent.yaml @@ -1,5 +1,5 @@ # -# Copyright (C) 2019-2024 vdaas.org vald team +# Copyright (C) 2019-2025 vdaas.org vald team # # Licensed under the Apache License, Version 2.0 (the "License"); # You may not use this file except in compliance with the License. From 3df88e8608ff967f74dc51030cc9fa6039cbddb3 Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 15 Jan 2025 15:45:44 +0900 Subject: [PATCH 07/13] :recycle: Fix race condition Signed-off-by: vankichi --- tests/e2e/crud/crud_test.go | 2 +- tests/e2e/operation/operation.go | 2 +- tests/e2e/operation/stream.go | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index 2ef50f16fa..6c7e034c02 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -1016,7 +1016,7 @@ func TestE2EAgentRolloutRestart(t *testing.T) { t.Fatalf("an error occurred: %s", err) } - err = op.Insert(t, ctx, operation.Dataset{ + err = op.Upsert(t, ctx, operation.Dataset{ Train: ds.Train[insertFrom : insertFrom+insertNum], }) if err != nil { diff --git a/tests/e2e/operation/operation.go b/tests/e2e/operation/operation.go index f8d8ae2027..4e1bbe5e85 100644 --- a/tests/e2e/operation/operation.go +++ b/tests/e2e/operation/operation.go @@ -201,7 +201,7 @@ func (c *client) getGRPCConn() (*grpc.ClientConn, error) { grpc.WithKeepaliveParams( keepalive.ClientParameters{ Time: time.Second, - Timeout: 5 * time.Second, + Timeout: 60 * time.Second, PermitWithoutStream: true, }, ), diff --git a/tests/e2e/operation/stream.go b/tests/e2e/operation/stream.go index 757f8e8e61..52813c923c 100644 --- a/tests/e2e/operation/stream.go +++ b/tests/e2e/operation/stream.go @@ -660,6 +660,7 @@ func (c *client) InsertWithParameters( if err != nil { if err := evalidator(t, err); err != nil { + mu.Lock() rerr = errors.Join( rerr, errors.Errorf( @@ -667,6 +668,7 @@ func (c *client) InsertWithParameters( err.Error(), ), ) + mu.Unlock() } return } @@ -858,23 +860,26 @@ func (c *client) UpsertWithParameters( wg.Add(1) go func() { defer wg.Done() + var ierr error for { res, err := sc.Recv() if err == io.EOF { + rerr = ierr return } if err != nil { if err := evalidator(t, err); err != nil { - rerr = errors.Join( - rerr, + ierr = errors.Join( + ierr, errors.Errorf( "stream finished by an error: %s", err.Error(), ), ) } + rerr = ierr return } @@ -887,7 +892,7 @@ func (c *client) UpsertWithParameters( status.GetCode(), status.GetMessage(), errdetails.Serialize(status.GetDetails())) - rerr = errors.Join(rerr, e) + ierr = errors.Join(ierr, e) } continue } From 11cd40da25a101d3563047b5b4cf2cf1322be7d9 Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 15 Jan 2025 15:49:50 +0900 Subject: [PATCH 08/13] :recycle: Fix race condition Signed-off-by: vankichi --- tests/e2e/operation/stream.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/e2e/operation/stream.go b/tests/e2e/operation/stream.go index 52813c923c..83511d1ce6 100644 --- a/tests/e2e/operation/stream.go +++ b/tests/e2e/operation/stream.go @@ -660,7 +660,6 @@ func (c *client) InsertWithParameters( if err != nil { if err := evalidator(t, err); err != nil { - mu.Lock() rerr = errors.Join( rerr, errors.Errorf( @@ -668,7 +667,6 @@ func (c *client) InsertWithParameters( err.Error(), ), ) - mu.Unlock() } return } @@ -857,6 +855,7 @@ func (c *client) UpsertWithParameters( } wg := sync.WaitGroup{} + mu := sync.Mutex{} wg.Add(1) go func() { defer wg.Done() @@ -865,7 +864,9 @@ func (c *client) UpsertWithParameters( for { res, err := sc.Recv() if err == io.EOF { + mu.Lock() rerr = ierr + mu.Unlock() return } @@ -879,7 +880,9 @@ func (c *client) UpsertWithParameters( ), ) } + mu.Lock() rerr = ierr + mu.Unlock() return } @@ -918,7 +921,10 @@ func (c *client) UpsertWithParameters( }, }) if err != nil { - return err + mu.Lock() + rerr = errors.Join(rerr, err) + mu.Unlock() + return } } From be7b4deed249b60bbe8924e20d1b1b4560f43965 Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 15 Jan 2025 16:29:37 +0900 Subject: [PATCH 09/13] :recycle: Fix race condition v2 Signed-off-by: vankichi --- tests/e2e/operation/stream.go | 76 +++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/tests/e2e/operation/stream.go b/tests/e2e/operation/stream.go index 83511d1ce6..be1a2d0b8a 100644 --- a/tests/e2e/operation/stream.go +++ b/tests/e2e/operation/stream.go @@ -647,27 +647,35 @@ func (c *client) InsertWithParameters( return err } + mu := sync.Mutex{} wg := sync.WaitGroup{} wg.Add(1) go func() { defer wg.Done() + var ierr error for { res, err := sc.Recv() if err == io.EOF { + mu.Lock() + rerr = ierr + mu.Unlock() return } if err != nil { if err := evalidator(t, err); err != nil { - rerr = errors.Join( - rerr, + ierr = errors.Join( + ierr, errors.Errorf( "stream finished by an error: %s", err.Error(), ), ) } + mu.Lock() + rerr = ierr + mu.Unlock() return } @@ -680,7 +688,7 @@ func (c *client) InsertWithParameters( status.GetCode(), status.GetMessage(), errdetails.Serialize(status.GetDetails())) - rerr = errors.Join(rerr, e) + ierr = errors.Join(ierr, e) } continue } @@ -705,7 +713,10 @@ func (c *client) InsertWithParameters( }, }) if err != nil { - return err + mu.Lock() + rerr = errors.Join(err, err) + mu.Unlock() + return } } @@ -750,27 +761,35 @@ func (c *client) UpdateWithParameters( return err } + mu := sync.Mutex{} wg := sync.WaitGroup{} wg.Add(1) go func() { defer wg.Done() + var ierr error for { res, err := sc.Recv() if err == io.EOF { + mu.Lock() + rerr = ierr + mu.Unlock() return } if err != nil { if err := evalidator(t, err); err != nil { - rerr = errors.Join( - rerr, + ierr = errors.Join( + ierr, errors.Errorf( "stream finished by an error: %s", err.Error(), ), ) } + mu.Lock() + rerr = ierr + mu.Unlock() return } @@ -783,7 +802,7 @@ func (c *client) UpdateWithParameters( st.GetCode(), st.GetMessage(), errdetails.Serialize(st.GetDetails())) - rerr = errors.Join(rerr, e) + ierr = errors.Join(ierr, e) } continue } @@ -809,7 +828,10 @@ func (c *client) UpdateWithParameters( }, }) if err != nil { - return err + mu.Lock() + rerr = errors.Join(rerr, err) + mu.Unlock() + return } } @@ -967,27 +989,35 @@ func (c *client) RemoveWithParameters( return err } + mu := sync.Mutex{} wg := sync.WaitGroup{} wg.Add(1) go func() { defer wg.Done() + var ierr error for { res, err := sc.Recv() if err == io.EOF { + mu.Lock() + rerr = ierr + mu.Unlock() return } if err != nil { if err := evalidator(t, err); err != nil { - rerr = errors.Join( - rerr, + ierr = errors.Join( + ierr, errors.Errorf( "stream finished by an error: %s", err.Error(), ), ) } + mu.Lock() + rerr = ierr + mu.Unlock() return } @@ -1000,7 +1030,7 @@ func (c *client) RemoveWithParameters( status.GetCode(), status.GetMessage(), errdetails.Serialize(status.GetDetails())) - rerr = errors.Join(rerr, e) + ierr = errors.Join(ierr, e) } continue } @@ -1024,7 +1054,10 @@ func (c *client) RemoveWithParameters( }, }) if err != nil { - return err + mu.Lock() + rerr = errors.Join(rerr, err) + mu.Unlock() + return } } @@ -1117,26 +1150,34 @@ func (c *client) GetObject(t *testing.T, ctx context.Context, ds Dataset) (rerr return err } + mu := sync.Mutex{} wg := sync.WaitGroup{} wg.Add(1) go func() { defer wg.Done() + var ierr error for { res, err := sc.Recv() if err == io.EOF { + mu.Lock() + rerr = ierr + mu.Unlock() return } if err != nil { err = ParseAndLogError(t, err) - rerr = errors.Join( - rerr, + ierr = errors.Join( + ierr, errors.Errorf( "stream finished by an error: %s", err.Error(), ), ) + mu.Lock() + rerr = ierr + mu.Unlock() return } @@ -1145,7 +1186,7 @@ func (c *client) GetObject(t *testing.T, ctx context.Context, ds Dataset) (rerr err := res.GetStatus() if err != nil { t.Errorf("an error returned:\tcode: %d\tmessage: %s\tdetails: %s", err.GetCode(), err.GetMessage(), errdetails.Serialize(err.GetDetails())) - rerr = errors.Wrap(rerr, err.String()) + ierr = errors.Wrap(ierr, err.String()) continue } @@ -1181,7 +1222,10 @@ func (c *client) GetObject(t *testing.T, ctx context.Context, ds Dataset) (rerr }, }) if err != nil { - return err + mu.Lock() + rerr = errors.Join(rerr, err) + mu.Unlock() + return } } From fdc3b879b4bf282ed7cf2d1d656c9852751905d1 Mon Sep 17 00:00:00 2001 From: vankichi Date: Thu, 16 Jan 2025 13:56:56 +0900 Subject: [PATCH 10/13] :recycle: Fix race condition Signed-off-by: vankichi --- tests/e2e/crud/crud_test.go | 10 ++++++---- tests/e2e/operation/operation.go | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index 6c7e034c02..b4c170b589 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -1039,16 +1039,18 @@ func TestE2EAgentRolloutRestart(t *testing.T) { done := make(chan struct{}) go func() { defer wg.Done() + var ierr error + for { select { case <-done: return default: - err = searchFunc() - if err != nil { - st, ok := status.FromError(err) + ierr = searchFunc() + if ierr != nil { + st, ok := status.FromError(ierr) if ok && st.Code() == codes.DeadlineExceeded { - _, _, rerr := status.ParseError(err, codes.DeadlineExceeded, "an error occurred") + _, _, rerr := status.ParseError(ierr, codes.DeadlineExceeded, "an error occurred") mu.Lock() serr = errors.Join(serr, rerr) mu.Unlock() diff --git a/tests/e2e/operation/operation.go b/tests/e2e/operation/operation.go index 4e1bbe5e85..7d3339571d 100644 --- a/tests/e2e/operation/operation.go +++ b/tests/e2e/operation/operation.go @@ -200,7 +200,7 @@ func (c *client) getGRPCConn() (*grpc.ClientConn, error) { grpc.WithInsecure(), grpc.WithKeepaliveParams( keepalive.ClientParameters{ - Time: time.Second, + Time: 10 * time.Minute, Timeout: 60 * time.Second, PermitWithoutStream: true, }, From ef92673737a073c3d92f7e45e59d90fbfaf50502 Mon Sep 17 00:00:00 2001 From: kpango Date: Thu, 16 Jan 2025 16:50:11 +0900 Subject: [PATCH 11/13] fix Signed-off-by: kpango --- .github/workflows/dockers-agent-ngt-image.yaml | 1 + dockers/agent/core/agent/Dockerfile | 2 +- dockers/agent/core/faiss/Dockerfile | 2 +- dockers/agent/core/ngt/Dockerfile | 2 +- dockers/agent/sidecar/Dockerfile | 2 +- dockers/binfmt/Dockerfile | 2 +- dockers/buildbase/Dockerfile | 2 +- dockers/buildkit/Dockerfile | 2 +- dockers/buildkit/syft/scanner/Dockerfile | 2 +- dockers/ci/base/Dockerfile | 2 +- dockers/dev/Dockerfile | 2 +- dockers/discoverer/k8s/Dockerfile | 2 +- dockers/example/client/Dockerfile | 2 +- dockers/gateway/filter/Dockerfile | 2 +- dockers/gateway/lb/Dockerfile | 2 +- dockers/gateway/mirror/Dockerfile | 2 +- dockers/index/job/correction/Dockerfile | 2 +- dockers/index/job/creation/Dockerfile | 2 +- dockers/index/job/deletion/Dockerfile | 2 +- dockers/index/job/readreplica/rotate/Dockerfile | 2 +- dockers/index/job/save/Dockerfile | 2 +- dockers/index/operator/Dockerfile | 2 +- dockers/manager/index/Dockerfile | 2 +- dockers/operator/helm/Dockerfile | 2 +- dockers/tools/benchmark/job/Dockerfile | 2 +- dockers/tools/benchmark/operator/Dockerfile | 2 +- dockers/tools/cli/loadtest/Dockerfile | 2 +- hack/docker/gen/main.go | 1 + 28 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.github/workflows/dockers-agent-ngt-image.yaml b/.github/workflows/dockers-agent-ngt-image.yaml index 4610cb8399..13d2764da2 100644 --- a/.github/workflows/dockers-agent-ngt-image.yaml +++ b/.github/workflows/dockers-agent-ngt-image.yaml @@ -269,4 +269,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: agent-ngt + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/dockers/agent/core/agent/Dockerfile b/dockers/agent/core/agent/Dockerfile index caf56e6e0e..c77f010b4a 100644 --- a/dockers/agent/core/agent/Dockerfile +++ b/dockers/agent/core/agent/Dockerfile @@ -94,4 +94,4 @@ LABEL maintainer="vdaas.org vald team " COPY --from=builder /usr/bin/agent /usr/bin/agent # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/agent"] +ENTRYPOINT ["/usr/bin/agent"] \ No newline at end of file diff --git a/dockers/agent/core/faiss/Dockerfile b/dockers/agent/core/faiss/Dockerfile index c369c9f429..5aa69be82b 100644 --- a/dockers/agent/core/faiss/Dockerfile +++ b/dockers/agent/core/faiss/Dockerfile @@ -96,4 +96,4 @@ COPY --from=builder /usr/bin/faiss /usr/bin/faiss COPY cmd/agent/core/faiss/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/faiss"] +ENTRYPOINT ["/usr/bin/faiss"] \ No newline at end of file diff --git a/dockers/agent/core/ngt/Dockerfile b/dockers/agent/core/ngt/Dockerfile index 8d4186b5af..e49057c923 100644 --- a/dockers/agent/core/ngt/Dockerfile +++ b/dockers/agent/core/ngt/Dockerfile @@ -95,4 +95,4 @@ COPY --from=builder /usr/bin/ngt /usr/bin/ngt COPY cmd/agent/core/ngt/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/ngt"] +ENTRYPOINT ["/usr/bin/ngt"] \ No newline at end of file diff --git a/dockers/agent/sidecar/Dockerfile b/dockers/agent/sidecar/Dockerfile index 5fdb77154c..48074a4bf0 100644 --- a/dockers/agent/sidecar/Dockerfile +++ b/dockers/agent/sidecar/Dockerfile @@ -85,4 +85,4 @@ LABEL maintainer="vdaas.org vald team " COPY --from=builder /usr/bin/sidecar /usr/bin/sidecar # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/sidecar"] +ENTRYPOINT ["/usr/bin/sidecar"] \ No newline at end of file diff --git a/dockers/binfmt/Dockerfile b/dockers/binfmt/Dockerfile index 0be93ba3d9..f63da8d40a 100644 --- a/dockers/binfmt/Dockerfile +++ b/dockers/binfmt/Dockerfile @@ -17,4 +17,4 @@ # # DO_NOT_EDIT this Dockerfile is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go -FROM tonistiigi/binfmt:master AS builder +FROM tonistiigi/binfmt:master AS builder \ No newline at end of file diff --git a/dockers/buildbase/Dockerfile b/dockers/buildbase/Dockerfile index 5dde4958c7..85d2385659 100644 --- a/dockers/buildbase/Dockerfile +++ b/dockers/buildbase/Dockerfile @@ -17,4 +17,4 @@ # # DO_NOT_EDIT this Dockerfile is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go -FROM ubuntu:devel AS builder +FROM ubuntu:devel AS builder \ No newline at end of file diff --git a/dockers/buildkit/Dockerfile b/dockers/buildkit/Dockerfile index 43a5a5b0b3..e63c6f5ebc 100644 --- a/dockers/buildkit/Dockerfile +++ b/dockers/buildkit/Dockerfile @@ -17,4 +17,4 @@ # # DO_NOT_EDIT this Dockerfile is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go -FROM moby/buildkit:master AS builder +FROM moby/buildkit:master AS builder \ No newline at end of file diff --git a/dockers/buildkit/syft/scanner/Dockerfile b/dockers/buildkit/syft/scanner/Dockerfile index 87be4558e9..b840429bf4 100644 --- a/dockers/buildkit/syft/scanner/Dockerfile +++ b/dockers/buildkit/syft/scanner/Dockerfile @@ -17,4 +17,4 @@ # # DO_NOT_EDIT this Dockerfile is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go -FROM docker/buildkit-syft-scanner:edge AS scanner +FROM docker/buildkit-syft-scanner:edge AS scanner \ No newline at end of file diff --git a/dockers/ci/base/Dockerfile b/dockers/ci/base/Dockerfile index ebaf45a89f..cbcfc5b3af 100644 --- a/dockers/ci/base/Dockerfile +++ b/dockers/ci/base/Dockerfile @@ -128,4 +128,4 @@ RUN --mount=type=bind,target=.,rw \ && rm -rf ${GOPATH}/src/github.com/${ORG}/${REPO}/* # skipcq: DOK-DL3002 USER root:root -ENTRYPOINT ["/bin/bash"] +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/dockers/dev/Dockerfile b/dockers/dev/Dockerfile index 290b99c402..01397c4620 100644 --- a/dockers/dev/Dockerfile +++ b/dockers/dev/Dockerfile @@ -141,4 +141,4 @@ RUN --mount=type=bind,target=.,rw \ && make faiss/install \ && rm -rf ${GOPATH}/src/github.com/${ORG}/${REPO}/* # skipcq: DOK-DL3002 -USER root:root +USER root:root \ No newline at end of file diff --git a/dockers/discoverer/k8s/Dockerfile b/dockers/discoverer/k8s/Dockerfile index 090b7ddc7e..55d8ce48b8 100644 --- a/dockers/discoverer/k8s/Dockerfile +++ b/dockers/discoverer/k8s/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/discoverer /usr/bin/discoverer COPY cmd/discoverer/k8s/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/discoverer"] +ENTRYPOINT ["/usr/bin/discoverer"] \ No newline at end of file diff --git a/dockers/example/client/Dockerfile b/dockers/example/client/Dockerfile index 7f1811198d..ceb3dada97 100644 --- a/dockers/example/client/Dockerfile +++ b/dockers/example/client/Dockerfile @@ -93,4 +93,4 @@ LABEL maintainer="vdaas.org vald team " COPY --from=builder /usr/bin/client /usr/bin/client # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/client"] +ENTRYPOINT ["/usr/bin/client"] \ No newline at end of file diff --git a/dockers/gateway/filter/Dockerfile b/dockers/gateway/filter/Dockerfile index cfdcee9020..493b7205b9 100644 --- a/dockers/gateway/filter/Dockerfile +++ b/dockers/gateway/filter/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/filter /usr/bin/filter COPY cmd/gateway/filter/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/filter"] +ENTRYPOINT ["/usr/bin/filter"] \ No newline at end of file diff --git a/dockers/gateway/lb/Dockerfile b/dockers/gateway/lb/Dockerfile index 73478fa5ab..ce2b8148dc 100644 --- a/dockers/gateway/lb/Dockerfile +++ b/dockers/gateway/lb/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/lb /usr/bin/lb COPY cmd/gateway/lb/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/lb"] +ENTRYPOINT ["/usr/bin/lb"] \ No newline at end of file diff --git a/dockers/gateway/mirror/Dockerfile b/dockers/gateway/mirror/Dockerfile index 2c9e11874e..9c0f667851 100644 --- a/dockers/gateway/mirror/Dockerfile +++ b/dockers/gateway/mirror/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/mirror /usr/bin/mirror COPY cmd/gateway/mirror/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/mirror"] +ENTRYPOINT ["/usr/bin/mirror"] \ No newline at end of file diff --git a/dockers/index/job/correction/Dockerfile b/dockers/index/job/correction/Dockerfile index d43c312fb7..882996b0d4 100644 --- a/dockers/index/job/correction/Dockerfile +++ b/dockers/index/job/correction/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index-correction /usr/bin/index-correction COPY cmd/index/job/correction/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index-correction"] +ENTRYPOINT ["/usr/bin/index-correction"] \ No newline at end of file diff --git a/dockers/index/job/creation/Dockerfile b/dockers/index/job/creation/Dockerfile index 7b56415884..6b49fc6fc1 100644 --- a/dockers/index/job/creation/Dockerfile +++ b/dockers/index/job/creation/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index-creation /usr/bin/index-creation COPY cmd/index/job/creation/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index-creation"] +ENTRYPOINT ["/usr/bin/index-creation"] \ No newline at end of file diff --git a/dockers/index/job/deletion/Dockerfile b/dockers/index/job/deletion/Dockerfile index 71590aacff..0058f6561a 100644 --- a/dockers/index/job/deletion/Dockerfile +++ b/dockers/index/job/deletion/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index-deletion /usr/bin/index-deletion COPY cmd/index/job/deletion/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index-deletion"] +ENTRYPOINT ["/usr/bin/index-deletion"] \ No newline at end of file diff --git a/dockers/index/job/readreplica/rotate/Dockerfile b/dockers/index/job/readreplica/rotate/Dockerfile index 41861347e0..509d576eab 100644 --- a/dockers/index/job/readreplica/rotate/Dockerfile +++ b/dockers/index/job/readreplica/rotate/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/readreplica-rotate /usr/bin/readreplica-rotate COPY cmd/index/job/readreplica/rotate/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/readreplica-rotate"] +ENTRYPOINT ["/usr/bin/readreplica-rotate"] \ No newline at end of file diff --git a/dockers/index/job/save/Dockerfile b/dockers/index/job/save/Dockerfile index 76723d1f64..7f97509feb 100644 --- a/dockers/index/job/save/Dockerfile +++ b/dockers/index/job/save/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index-save /usr/bin/index-save COPY cmd/index/job/save/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index-save"] +ENTRYPOINT ["/usr/bin/index-save"] \ No newline at end of file diff --git a/dockers/index/operator/Dockerfile b/dockers/index/operator/Dockerfile index 4195fff32a..3b19aa3b91 100644 --- a/dockers/index/operator/Dockerfile +++ b/dockers/index/operator/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index-operator /usr/bin/index-operator COPY cmd/index/operator/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index-operator"] +ENTRYPOINT ["/usr/bin/index-operator"] \ No newline at end of file diff --git a/dockers/manager/index/Dockerfile b/dockers/manager/index/Dockerfile index 0212228172..c75f4b61f8 100644 --- a/dockers/manager/index/Dockerfile +++ b/dockers/manager/index/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/index /usr/bin/index COPY cmd/manager/index/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/index"] +ENTRYPOINT ["/usr/bin/index"] \ No newline at end of file diff --git a/dockers/operator/helm/Dockerfile b/dockers/operator/helm/Dockerfile index b40530fc31..0bdbd4ae13 100644 --- a/dockers/operator/helm/Dockerfile +++ b/dockers/operator/helm/Dockerfile @@ -107,4 +107,4 @@ COPY --from=builder /opt/helm/charts/vald /opt/helm/charts/vald COPY --from=builder /opt/helm/charts/vald-helm-operator /opt/helm/charts/vald-helm-operator # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/helm-operator", "run", "--watches-file=/opt/helm/watches.yaml"] +ENTRYPOINT ["/usr/bin/helm-operator", "run", "--watches-file=/opt/helm/watches.yaml"] \ No newline at end of file diff --git a/dockers/tools/benchmark/job/Dockerfile b/dockers/tools/benchmark/job/Dockerfile index 260f95034e..e3a32f57e2 100644 --- a/dockers/tools/benchmark/job/Dockerfile +++ b/dockers/tools/benchmark/job/Dockerfile @@ -94,4 +94,4 @@ COPY --from=builder /usr/bin/job /usr/bin/job COPY cmd/tools/benchmark/job/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/job"] +ENTRYPOINT ["/usr/bin/job"] \ No newline at end of file diff --git a/dockers/tools/benchmark/operator/Dockerfile b/dockers/tools/benchmark/operator/Dockerfile index 966ccf289f..592a67e2d3 100644 --- a/dockers/tools/benchmark/operator/Dockerfile +++ b/dockers/tools/benchmark/operator/Dockerfile @@ -86,4 +86,4 @@ COPY --from=builder /usr/bin/operator /usr/bin/operator COPY cmd/tools/benchmark/operator/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/operator"] +ENTRYPOINT ["/usr/bin/operator"] \ No newline at end of file diff --git a/dockers/tools/cli/loadtest/Dockerfile b/dockers/tools/cli/loadtest/Dockerfile index e972744e1c..25d72f78a9 100644 --- a/dockers/tools/cli/loadtest/Dockerfile +++ b/dockers/tools/cli/loadtest/Dockerfile @@ -94,4 +94,4 @@ COPY --from=builder /usr/bin/loadtest /usr/bin/loadtest COPY cmd/tools/cli/loadtest/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot -ENTRYPOINT ["/usr/bin/loadtest"] +ENTRYPOINT ["/usr/bin/loadtest"] \ No newline at end of file diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 811f7d12ff..96ba441aa8 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -672,6 +672,7 @@ func main() { PackageDir: agent + "/core/ngt", ExtraPackages: append(clangBuildDeps, ngtBuildDeps...), Preprocess: []string{ngtPreprocess}, + BuildPlatforms: multiPlatforms, }, "vald-" + agentFaiss: { AppName: "faiss", From 0289138bab662666ad76f78b69fb54a49827a16a Mon Sep 17 00:00:00 2001 From: kpango Date: Thu, 16 Jan 2025 16:53:06 +0900 Subject: [PATCH 12/13] fix Signed-off-by: kpango --- .github/workflows/dockers-agent-faiss-image.yaml | 1 + .github/workflows/dockers-agent-image.yaml | 1 + .github/workflows/dockers-agent-sidecar-image.yaml | 1 + .github/workflows/dockers-benchmark-job-image.yaml | 1 + .github/workflows/dockers-benchmark-operator-image.yaml | 1 + .github/workflows/dockers-dev-container-image.yaml | 1 + .github/workflows/dockers-discoverer-k8s-image.yaml | 1 + .github/workflows/dockers-example-client-image.yaml | 1 + .github/workflows/dockers-gateway-filter-image.yaml | 1 + .github/workflows/dockers-gateway-lb-image.yaml | 1 + .github/workflows/dockers-gateway-mirror-image.yaml | 1 + .github/workflows/dockers-helm-operator-image.yaml | 1 + .github/workflows/dockers-index-correction-image.yaml | 1 + .github/workflows/dockers-index-creation-image.yaml | 1 + .github/workflows/dockers-index-deletion-image.yaml | 1 + .github/workflows/dockers-index-operator-image.yaml | 1 + .github/workflows/dockers-index-save-image.yaml | 1 + .github/workflows/dockers-manager-index-image.yaml | 1 + .github/workflows/dockers-readreplica-rotate-image.yaml | 1 + hack/docker/gen/main.go | 4 +++- 20 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dockers-agent-faiss-image.yaml b/.github/workflows/dockers-agent-faiss-image.yaml index 153ee05a14..d691b4f0fa 100644 --- a/.github/workflows/dockers-agent-faiss-image.yaml +++ b/.github/workflows/dockers-agent-faiss-image.yaml @@ -265,4 +265,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: agent-faiss + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-agent-image.yaml b/.github/workflows/dockers-agent-image.yaml index f1311bf7fb..960768a228 100644 --- a/.github/workflows/dockers-agent-image.yaml +++ b/.github/workflows/dockers-agent-image.yaml @@ -71,4 +71,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: agent + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-agent-sidecar-image.yaml b/.github/workflows/dockers-agent-sidecar-image.yaml index a3a0f6a68e..1870d99d64 100644 --- a/.github/workflows/dockers-agent-sidecar-image.yaml +++ b/.github/workflows/dockers-agent-sidecar-image.yaml @@ -295,4 +295,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: agent-sidecar + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-benchmark-job-image.yaml b/.github/workflows/dockers-benchmark-job-image.yaml index 9202808fe5..ad9d86e0c6 100644 --- a/.github/workflows/dockers-benchmark-job-image.yaml +++ b/.github/workflows/dockers-benchmark-job-image.yaml @@ -261,4 +261,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: benchmark-job + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-benchmark-operator-image.yaml b/.github/workflows/dockers-benchmark-operator-image.yaml index 403d2bb82a..68b26b13cb 100644 --- a/.github/workflows/dockers-benchmark-operator-image.yaml +++ b/.github/workflows/dockers-benchmark-operator-image.yaml @@ -255,4 +255,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: benchmark-operator + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-dev-container-image.yaml b/.github/workflows/dockers-dev-container-image.yaml index 2a55c83da2..85a4e76788 100644 --- a/.github/workflows/dockers-dev-container-image.yaml +++ b/.github/workflows/dockers-dev-container-image.yaml @@ -55,4 +55,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: dev-container + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-discoverer-k8s-image.yaml b/.github/workflows/dockers-discoverer-k8s-image.yaml index 048bebc47d..e594e1be8f 100644 --- a/.github/workflows/dockers-discoverer-k8s-image.yaml +++ b/.github/workflows/dockers-discoverer-k8s-image.yaml @@ -259,4 +259,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: discoverer-k8s + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-example-client-image.yaml b/.github/workflows/dockers-example-client-image.yaml index b529a4bc25..f3db763d4f 100644 --- a/.github/workflows/dockers-example-client-image.yaml +++ b/.github/workflows/dockers-example-client-image.yaml @@ -65,4 +65,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: example-client + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-gateway-filter-image.yaml b/.github/workflows/dockers-gateway-filter-image.yaml index cc361b2435..f092f303aa 100644 --- a/.github/workflows/dockers-gateway-filter-image.yaml +++ b/.github/workflows/dockers-gateway-filter-image.yaml @@ -259,4 +259,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: gateway-filter + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-gateway-lb-image.yaml b/.github/workflows/dockers-gateway-lb-image.yaml index 9a28c196e0..c13ab8971c 100644 --- a/.github/workflows/dockers-gateway-lb-image.yaml +++ b/.github/workflows/dockers-gateway-lb-image.yaml @@ -257,4 +257,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: gateway-lb + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-gateway-mirror-image.yaml b/.github/workflows/dockers-gateway-mirror-image.yaml index 4f6e0c485c..1c68964e8c 100644 --- a/.github/workflows/dockers-gateway-mirror-image.yaml +++ b/.github/workflows/dockers-gateway-mirror-image.yaml @@ -261,4 +261,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: gateway-mirror + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-helm-operator-image.yaml b/.github/workflows/dockers-helm-operator-image.yaml index 9b62139dbe..f2818cfa3d 100644 --- a/.github/workflows/dockers-helm-operator-image.yaml +++ b/.github/workflows/dockers-helm-operator-image.yaml @@ -65,4 +65,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: helm-operator + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-index-correction-image.yaml b/.github/workflows/dockers-index-correction-image.yaml index f8f65b969f..a98b0a26c0 100644 --- a/.github/workflows/dockers-index-correction-image.yaml +++ b/.github/workflows/dockers-index-correction-image.yaml @@ -249,4 +249,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: index-correction + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-index-creation-image.yaml b/.github/workflows/dockers-index-creation-image.yaml index 0436b46a0d..d750782597 100644 --- a/.github/workflows/dockers-index-creation-image.yaml +++ b/.github/workflows/dockers-index-creation-image.yaml @@ -243,4 +243,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: index-creation + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-index-deletion-image.yaml b/.github/workflows/dockers-index-deletion-image.yaml index 653763aad7..6cb1b50bc6 100644 --- a/.github/workflows/dockers-index-deletion-image.yaml +++ b/.github/workflows/dockers-index-deletion-image.yaml @@ -243,4 +243,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: index-deletion + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-index-operator-image.yaml b/.github/workflows/dockers-index-operator-image.yaml index 6e33ad364a..a7c62e7737 100644 --- a/.github/workflows/dockers-index-operator-image.yaml +++ b/.github/workflows/dockers-index-operator-image.yaml @@ -241,4 +241,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: index-operator + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-index-save-image.yaml b/.github/workflows/dockers-index-save-image.yaml index 4506c14dc3..9883165aa9 100644 --- a/.github/workflows/dockers-index-save-image.yaml +++ b/.github/workflows/dockers-index-save-image.yaml @@ -243,4 +243,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: index-save + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-manager-index-image.yaml b/.github/workflows/dockers-manager-index-image.yaml index 580fbfbbcd..17dec7f2fa 100644 --- a/.github/workflows/dockers-manager-index-image.yaml +++ b/.github/workflows/dockers-manager-index-image.yaml @@ -263,4 +263,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: manager-index + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/.github/workflows/dockers-readreplica-rotate-image.yaml b/.github/workflows/dockers-readreplica-rotate-image.yaml index ab1bc9b67e..c542eee184 100644 --- a/.github/workflows/dockers-readreplica-rotate-image.yaml +++ b/.github/workflows/dockers-readreplica-rotate-image.yaml @@ -239,4 +239,5 @@ jobs: uses: ./.github/workflows/_docker-image.yaml with: target: readreplica-rotate + platforms: linux/amd64,linux/arm64 secrets: inherit diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 96ba441aa8..6c0a39c35e 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -672,7 +672,6 @@ func main() { PackageDir: agent + "/core/ngt", ExtraPackages: append(clangBuildDeps, ngtBuildDeps...), Preprocess: []string{ngtPreprocess}, - BuildPlatforms: multiPlatforms, }, "vald-" + agentFaiss: { AppName: "faiss", @@ -1025,6 +1024,9 @@ jobs: workflow.On.PullRequest.Paths = slices.Compact(workflow.On.PullRequest.Paths) workflow.On.PullRequestTarget.Paths = workflow.On.PullRequest.Paths + if data.BuildPlatforms == "" { + data.BuildPlatforms = multiPlatforms + } workflow.Jobs.Build.With.Platforms = data.BuildPlatforms workflowYamlTmp, err := yaml.Marshal(workflow) From 625c0357117d38417e3afcc2b16c23a51e11b20d Mon Sep 17 00:00:00 2001 From: vankichi Date: Wed, 22 Jan 2025 17:41:09 +0900 Subject: [PATCH 13/13] :recycle: fix Signed-off-by: vankichi --- .github/helm/values/values-rollout-agent.yaml | 57 +++++++++++++++++- pkg/agent/core/ngt/service/ngt.go | 2 +- tests/e2e/crud/crud_test.go | 58 ++++++++++++------- tests/e2e/operation/stream.go | 4 +- 4 files changed, 96 insertions(+), 25 deletions(-) diff --git a/.github/helm/values/values-rollout-agent.yaml b/.github/helm/values/values-rollout-agent.yaml index 1a6b55bf90..3f229f5740 100644 --- a/.github/helm/values/values-rollout-agent.yaml +++ b/.github/helm/values/values-rollout-agent.yaml @@ -15,22 +15,75 @@ # defaults: + image: + tag: pr-2798 logging: level: debug + server_config: + metrics: + pprof: + enabled: true # enable Pyroscope + healths: + liveness: + livenessProbe: + initialDelaySeconds: 60 + readiness: + readinessProbe: + initialDelaySeconds: 60 + servers: + grpc: + server: + grpc: + interceptors: + - RecoverInterceptor + - TraceInterceptor + - MetricInterceptor + grpc: + client: + dial_option: + interceptors: + - TraceInterceptor + observability: + enabled: true + otlp: + collector_endpoint: "opentelemetry-collector-collector.default.svc.cluster.local:4317" + trace: + enabled: true networkPolicy: enabled: true + custom: + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: pyroscope + egress: + - to: + - podSelector: + matchLabels: + app.kubernetes.io/name: opentelemetry-collector-collector gateway: lb: enabled: true + maxReplicas: 1 minReplicas: 1 hpa: enabled: false resources: requests: - cpu: 100m - memory: 50Mi + cpu: 200m + memory: 150Mi gateway_config: index_replica: 2 + # ingress: + # # if enabled is true, vald-lb-gateway can be connected through Kubernetes ingress from the external network. + # enabled: true + # # TODO: Set your ingress host. + # host: localhost + # service: + # # NOTE: https://doc.traefik.io/traefik/routing/providers/kubernetes-ingress/#on-service + # annotations: + # traefik.ingress.kubernetes.io/service.serversscheme: h2c agent: minReplicas: 3 maxReplicas: 3 diff --git a/pkg/agent/core/ngt/service/ngt.go b/pkg/agent/core/ngt/service/ngt.go index 66fc3fbf2f..2c60359b2a 100644 --- a/pkg/agent/core/ngt/service/ngt.go +++ b/pkg/agent/core/ngt/service/ngt.go @@ -704,7 +704,7 @@ func (n *ngt) initNGT(opts ...core.Option) (err error) { var current uint64 if err != nil { if !n.enableCopyOnWrite { - log.Debug("failed to load vald index from %s\t error: %v", n.path, err) + log.Debugf("failed to load vald index from %s\t error: %v", n.path, err) if n.kvs == nil { n.kvs = kvs.New(kvs.WithConcurrency(n.kvsdbConcurrency)) } else if n.kvs.Len() > 0 { diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index b4c170b589..b6d42247ff 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -34,6 +34,7 @@ import ( "github.com/vdaas/vald/internal/file" "github.com/vdaas/vald/internal/net/grpc/codes" "github.com/vdaas/vald/internal/net/grpc/status" + "github.com/vdaas/vald/internal/sync/errgroup" "github.com/vdaas/vald/tests/e2e/hdf5" "github.com/vdaas/vald/tests/e2e/kubernetes/client" "github.com/vdaas/vald/tests/e2e/kubernetes/kubectl" @@ -55,13 +56,14 @@ var ( upsertNum int removeNum int - insertFrom int - searchFrom int - searchByIDFrom int - getObjectFrom int - updateFrom int - upsertFrom int - removeFrom int + insertFrom int + searchFrom int + searchByIDFrom int + searchConcurrency int + getObjectFrom int + updateFrom int + upsertFrom int + removeFrom int waitAfterInsertDuration time.Duration waitResourceReadyDuration time.Duration @@ -83,6 +85,7 @@ func init() { flag.IntVar(&correctionInsertNum, "correction-insert-num", 10000, "number of id-vector pairs used for insert") flag.IntVar(&searchNum, "search-num", 10000, "number of id-vector pairs used for search") flag.IntVar(&searchByIDNum, "search-by-id-num", 100, "number of id-vector pairs used for search-by-id") + flag.IntVar(&searchConcurrency, "search-conn", 100, "number of search concurrency") flag.IntVar(&getObjectNum, "get-object-num", 100, "number of id-vector pairs used for get-object") flag.IntVar(&updateNum, "update-num", 10000, "number of id-vector pairs used for update") flag.IntVar(&upsertNum, "upsert-num", 10000, "number of id-vector pairs used for upsert") @@ -1025,7 +1028,7 @@ func TestE2EAgentRolloutRestart(t *testing.T) { sleep(t, waitAfterInsertDuration) - searchFunc := func() error { + searchFunc := func(ctx context.Context) error { return op.Search(t, ctx, operation.Dataset{ Test: ds.Test[searchFrom : searchFrom+searchNum], Neighbors: ds.Neighbors[searchFrom : searchFrom+searchNum], @@ -1039,23 +1042,32 @@ func TestE2EAgentRolloutRestart(t *testing.T) { done := make(chan struct{}) go func() { defer wg.Done() - var ierr error for { select { case <-done: return default: - ierr = searchFunc() - if ierr != nil { - st, ok := status.FromError(ierr) - if ok && st.Code() == codes.DeadlineExceeded { - _, _, rerr := status.ParseError(ierr, codes.DeadlineExceeded, "an error occurred") - mu.Lock() - serr = errors.Join(serr, rerr) - mu.Unlock() - } + eg, egctx := errgroup.New(ctx) + for i := 0; i < searchConcurrency; i++ { + eg.Go(func() (e error) { + ierr := searchFunc(egctx) + if ierr != nil { + st, ok := status.FromError(ierr) + if ok && st.Code() == codes.DeadlineExceeded { + _, _, rerr := status.ParseError(ierr, codes.DeadlineExceeded, "an error occurred") + mu.Lock() + e = errors.Join(e, rerr) + mu.Unlock() + } + } + return + }) } + egerr := eg.Wait() + mu.Lock() + serr = errors.Join(serr, egerr) + mu.Unlock() time.Sleep(10 * time.Second) } } @@ -1070,6 +1082,9 @@ func TestE2EAgentRolloutRestart(t *testing.T) { cnt, err := op.IndexInfo(t, ctx) if err != nil { + if cnt == nil { + t.Fatalf("an error occurred: err = %s", err) + } t.Fatalf("an error occurred: count = %d, err = %s", cnt.Stored, err) } @@ -1093,13 +1108,14 @@ func TestE2EAgentRolloutRestart(t *testing.T) { } // Remove all vector data after the current - 1 hour. + // err = op.Flush(t, ctx) err = op.RemoveByTimestamp(t, ctx, time.Now().Add(-time.Hour).UnixNano()) - if err != nil { - t.Fatalf("an error occurred: %s", err) - } close(done) wg.Wait() if serr != nil { t.Fatalf("an error occurred: %s", serr) } + if err != nil { + t.Fatalf("an error occurred: %s", err) + } } diff --git a/tests/e2e/operation/stream.go b/tests/e2e/operation/stream.go index be1a2d0b8a..0d1d950715 100644 --- a/tests/e2e/operation/stream.go +++ b/tests/e2e/operation/stream.go @@ -21,6 +21,7 @@ import ( "reflect" "strconv" "testing" + "time" "github.com/vdaas/vald/apis/grpc/v1/payload" "github.com/vdaas/vald/internal/errors" @@ -268,13 +269,14 @@ func (c *client) SearchWithParameters( } func (c *client) SearchByID(t *testing.T, ctx context.Context, ds Dataset) error { + to := time.Second * 3 return c.SearchByIDWithParameters(t, ctx, ds, 100, -1.0, 0.1, - 3000000000, + to.Nanoseconds(), DefaultStatusValidator, ParseAndLogError, )