From a1fe0e0119da5b696e46b0fe8797a269c26367a6 Mon Sep 17 00:00:00 2001 From: achettyiitr Date: Wed, 18 Sep 2024 12:27:23 +0530 Subject: [PATCH 1/2] feat: snowpipe streaming --- .github/workflows/tests.yaml | 4 +- gateway/handle.go | 3 +- .../snowpipestreaming_test.go | 1605 +++++++++++++++++ ...docker-compose.rudder-snowpipe-clients.yml | 11 + .../docker-compose.rudder-transformer.yml | 2 +- .../asyncdestinationmanager/common/utils.go | 2 +- .../asyncdestinationmanager/manager.go | 3 + .../snowpipestreaming/apiadapter.go | 71 + .../snowpipestreaming/channel.go | 166 ++ .../snowpipestreaming/columns.go | 35 + .../snowpipestreaming/discards.go | 145 ++ .../snowpipestreaming/internal/api/api.go | 27 + .../internal/api/api_test.go | 320 ++++ .../internal/api/createchannel.go | 43 + .../internal/api/createchannel_test.go | 172 ++ .../internal/api/deletechannel.go | 34 + .../internal/api/deletechannel_test.go | 63 + .../internal/api/errorcodes.go | 12 + .../internal/api/getchannel.go | 37 + .../internal/api/getchannel_test.go | 108 ++ .../snowpipestreaming/internal/api/insert.go | 42 + .../internal/api/insert_test.go | 114 ++ .../snowpipestreaming/internal/api/status.go | 36 + .../internal/api/status_test.go | 80 + .../snowpipestreaming/internal/model/model.go | 96 + .../internal/model/model_test.go | 93 + .../snowpipestreaming/options.go | 9 + .../snowpipestreaming/poll.go | 103 ++ .../snowpipestreaming/snowpipestreaming.go | 108 ++ ...docker-compose.rudder-snowpipe-clients.yml | 11 + .../testhelper/testhelper.go | 72 + .../snowpipestreaming/transform.go | 10 + .../snowpipestreaming/types.go | 116 ++ .../snowpipestreaming/upload.go | 266 +++ .../snowpipestreaming/uploadstats.go | 65 + router/batchrouter/handle.go | 2 + router/batchrouter/handle_async.go | 3 +- router/batchrouter/handle_lifecycle.go | 1 + testhelper/warehouse/records.go | 58 + utils/misc/misc.go | 2 +- .../integrations/snowflake/datatype_mapper.go | 6 +- .../snowflake/datatype_mapper_test.go | 2 +- warehouse/integrations/snowflake/snowflake.go | 2 +- warehouse/slave/worker.go | 6 +- warehouse/slave/worker_test.go | 2 +- warehouse/utils/uploader.go | 56 + warehouse/utils/utils.go | 25 +- 47 files changed, 4218 insertions(+), 31 deletions(-) create mode 100644 integration_test/snowpipestreaming/snowpipestreaming_test.go create mode 100644 integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml rename warehouse/integrations/testdata/docker-compose.transformer.yml => integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml (66%) create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/transform.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go create mode 100644 testhelper/warehouse/records.go create mode 100644 warehouse/utils/uploader.go diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 14b29b87e6..2a441cfe29 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -145,6 +145,7 @@ jobs: - integration_test/tracing - integration_test/backendconfigunavailability - integration_test/trackedusersreporting + - integration_test/snowpipestreaming - processor - regulation-worker - router @@ -186,7 +187,8 @@ jobs: TEST_KAFKA_AZURE_EVENT_HUBS_CLOUD_CONNECTION_STRING: ${{ secrets.TEST_KAFKA_AZURE_EVENT_HUBS_CLOUD_CONNECTION_STRING }} TEST_S3_DATALAKE_CREDENTIALS: ${{ secrets.TEST_S3_DATALAKE_CREDENTIALS }} BIGQUERY_INTEGRATION_TEST_CREDENTIALS: ${{ secrets.BIGQUERY_INTEGRATION_TEST_CREDENTIALS }} - run: make test exclude="${{ matrix.exclude }}" package=${{ matrix.package }} + SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS: ${{ secrets.SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS }} + run: FORCE_RUN_INTEGRATION_TESTS=true make test exclude="${{ matrix.exclude }}" package=${{ matrix.package }} - name: Sanitize name for Artifact run: | name=$(echo -n "${{ matrix.package }}" | sed -e 's/[ \t:\/\\"<>|*?]/-/g' -e 's/--*/-/g') diff --git a/gateway/handle.go b/gateway/handle.go index 716c111caa..d659b46e2f 100644 --- a/gateway/handle.go +++ b/gateway/handle.go @@ -42,6 +42,7 @@ import ( sourcedebugger "github.com/rudderlabs/rudder-server/services/debugger/source" "github.com/rudderlabs/rudder-server/services/rsources" "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/timeutil" "github.com/rudderlabs/rudder-server/utils/types" ) @@ -488,7 +489,7 @@ func (gw *Handle) getJobDataFromRequest(req *webRequestT) (jobData *jobFromReq, } receivedAt, ok := userEvent.events[0]["receivedAt"].(string) if !ok || !arctx.ReplaySource { - receivedAt = time.Now().Format(misc.RFC3339Milli) + receivedAt = timeutil.Now().Format(misc.RFC3339Milli) } singularEventBatch := SingularEventBatch{ Batch: userEvent.events, diff --git a/integration_test/snowpipestreaming/snowpipestreaming_test.go b/integration_test/snowpipestreaming/snowpipestreaming_test.go new file mode 100644 index 0000000000..4f1939f714 --- /dev/null +++ b/integration_test/snowpipestreaming/snowpipestreaming_test.go @@ -0,0 +1,1605 @@ +package snowpipestreaming + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "path" + "strconv" + "strings" + "testing" + "time" + + "github.com/ory/dockertest/v3" + promClient "github.com/prometheus/client_model/go" + "github.com/rudderlabs/rudder-go-kit/stats/testhelper" + "github.com/samber/lo" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" + + "github.com/rudderlabs/compose-test/compose" + "github.com/rudderlabs/compose-test/testcompose" + "github.com/rudderlabs/rudder-go-kit/config" + kithttputil "github.com/rudderlabs/rudder-go-kit/httputil" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" + "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/postgres" + "github.com/rudderlabs/rudder-go-kit/testhelper/rand" + + "github.com/rudderlabs/rudder-server/runner" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" + "github.com/rudderlabs/rudder-server/testhelper/health" + "github.com/rudderlabs/rudder-server/utils/httputil" + "github.com/rudderlabs/rudder-server/utils/timeutil" + "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" + whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +const ( + testKeyPairUnencrypted = "SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS" +) + +type testCredentials struct { + Account string `json:"account"` + User string `json:"user"` + Role string `json:"role"` + Database string `json:"database"` + Warehouse string `json:"warehouse"` + PrivateKey string `json:"privateKey"` + PrivateKeyPassphrase string `json:"privateKeyPassphrase"` +} + +func getSnowpipeTestCredentials(key string) (*testCredentials, error) { + cred, exists := os.LookupEnv(key) + if !exists { + return nil, errors.New("snowpipe test credentials not found") + } + + var credentials testCredentials + err := json.Unmarshal([]byte(cred), &credentials) + if err != nil { + return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) + } + return &credentials, nil +} + +func randSchema(provider string) string { // nolint:unparam + hex := strings.ToLower(rand.String(12)) + namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) + return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, + namespace, + )) +} + +func TestSnowPipeStreaming(t *testing.T) { + for _, key := range []string{ + testKeyPairUnencrypted, + } { + if _, exists := os.LookupEnv(key); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", key) + } + t.Skipf("Skipping %s as %s is not set", t.Name(), key) + } + } + + c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.rudder-snowpipe-clients.yml", "testdata/docker-compose.rudder-transformer.yml"})) + c.Start(context.Background()) + + transformerURL := fmt.Sprintf("http://localhost:%d", c.Port("transformer", 9090)) + snowPipeClientsURL := fmt.Sprintf("http://localhost:%d", c.Port("rudder-snowpipe-clients", 9078)) + + keyPairUnEncryptedCredentials, err := getSnowpipeTestCredentials(testKeyPairUnencrypted) + require.NoError(t, err) + + t.Run("namespace and table already exists", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("namespace does not exists", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("table does not exists", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("events with different schema", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}, "additional_column_%[1]s": "%[1]s"},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "1", "", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "2", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "3", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "4", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "", "5"}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "1", "", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "", "2", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "", "", "3", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "", "", "", "4", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "", "", "", "", "5"}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("discards", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, [][]string{ + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + }, + discardsRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("discards migration for reason", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, [][]string{ + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + }, + discardsRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("discards migrated", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, [][]string{ + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, + }, + discardsRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("don't re-create channel on loading twice when successful", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + prometheusPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + t.Log("Sending 5 events") + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + t.Log("Sending 5 events again") + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 10 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 20 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + metrics := getPrometheusMetrics(t, prometheusPort) + require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) + require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("many tables", func(t *testing.T) {}) + + t.Run("schema modified after channel creation (datatype changed)", func(t *testing.T) { + config.Reset() + defer config.Reset() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gwPort, err := kithelper.GetFreePort() + require.NoError(t, err) + prometheusPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + namespace := randSchema(whutils.SNOWFLAKE) + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", keyPairUnEncryptedCredentials.Account). + WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). + WithConfigOption("database", keyPairUnEncryptedCredentials.Database). + WithConfigOption("role", keyPairUnEncryptedCredentials.Role). + WithConfigOption("user", keyPairUnEncryptedCredentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + bcServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + defer bcServer.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + wg, ctx := errgroup.WithContext(ctx) + wg.Go(func() error { + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + + err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + if err != nil { + t.Logf("rudder-server exited with error: %v", err) + } + return err + }) + url := fmt.Sprintf("http://localhost:%d", gwPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) + require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + + t.Log("Sending 5 events") + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 5 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 10 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + t.Log("Schema modified") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, err) + + t.Log("Sending 5 events again") + err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, err) + + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("gw processedJobCount: %d", jobsCount) + return jobsCount == 10 + }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") + require.Eventually(t, func() bool { + var jobsCount int + require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) + t.Logf("batch_rt succeeded: %d", jobsCount) + return jobsCount == 20 + }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + + metrics := getPrometheusMetrics(t, prometheusPort) + require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) + require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) + ts := timeutil.Now().Format("2006-01-02") + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + }, + usersRecords, + ) + identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + }, + identifiesRecords, + ) + + cancel() + _ = wg.Wait() + }) + + t.Run("schema modified after channel creation (table deleted)", func(t *testing.T) {}) + + t.Run("schema modified after channel creation (schema deleted)", func(t *testing.T) {}) + + t.Run("schema modified after channel creation (columns deleted)", func(t *testing.T) {}) +} + +func runRudderServer(ctx context.Context, port int, postgresContainer *postgres.Resource, cbURL, transformerURL, snowpipeClientsURL, tmpDir string) (err error) { + config.Set("CONFIG_BACKEND_URL", cbURL) + config.Set("WORKSPACE_TOKEN", "token") + config.Set("DB.host", postgresContainer.Host) + config.Set("DB.port", postgresContainer.Port) + config.Set("DB.user", postgresContainer.User) + config.Set("DB.name", postgresContainer.Database) + config.Set("DB.password", postgresContainer.Password) + config.Set("DEST_TRANSFORM_URL", transformerURL) + config.Set("SnowpipeStreaming.Client.URL", snowpipeClientsURL) + config.Set("BatchRouter.pollStatusLoopSleep", "1s") + config.Set("BatchRouter.asyncUploadTimeout", "1s") + config.Set("BatchRouter.asyncUploadWorkerTimeout", "1s") + config.Set("BatchRouter.mainLoopFreq", "1s") + config.Set("BatchRouter.uploadFreq", "1s") + config.Set("BatchRouter.isolationMode", "none") + + config.Set("Warehouse.mode", "off") + config.Set("DestinationDebugger.disableEventDeliveryStatusUploads", true) + config.Set("SourceDebugger.disableEventUploads", true) + config.Set("TransformationDebugger.disableTransformationStatusUploads", true) + config.Set("JobsDB.backup.enabled", false) + config.Set("JobsDB.migrateDSLoopSleepDuration", "60m") + config.Set("archival.Enabled", false) + config.Set("Reporting.syncer.enabled", false) + config.Set("BatchRouter.mainLoopFreq", "1s") + config.Set("BatchRouter.uploadFreq", "1s") + config.Set("Gateway.webPort", strconv.Itoa(port)) + config.Set("RUDDER_TMPDIR", os.TempDir()) + config.Set("recovery.storagePath", path.Join(tmpDir, "/recovery_data.json")) + config.Set("recovery.enabled", false) + config.Set("Profiler.Enabled", false) + config.Set("Gateway.enableSuppressUserFeature", false) + + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("panicked: %v", r) + } + }() + r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN"}) + c := r.Run(ctx, + []string{"proc-isolation-test-rudder-server"}) + if c != 0 { + err = fmt.Errorf("rudder-server exited with a non-0 exit code: %d", c) + } + return +} + +func sendEvents(num int, eventFormat func(index int) string, writeKey, url string) error { // nolint:unparam + for i := 0; i < num; i++ { + payload := []byte(eventFormat(i)) + req, err := http.NewRequest(http.MethodPost, url+"/v1/batch", bytes.NewReader(payload)) + if err != nil { + return err + } + req.SetBasicAuth(writeKey, "password") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("failed to send event to rudder server, status code: %d: %s", resp.StatusCode, string(b)) + } + func() { kithttputil.CloseResponse(resp) }() + } + return nil +} + +func dropSchema(t *testing.T, db *sql.DB, namespace string) { + t.Helper() + t.Log("dropping schema", namespace) + + require.Eventually(t, + func() bool { + _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + if err != nil { + t.Logf("error deleting schema %q: %v", namespace, err) + return false + } + return true + }, + time.Minute, + time.Second, + ) +} + +func convertRecordsToSchema(input [][]string) map[string]map[string]string { + return lo.MapValues(lo.GroupBy(input, func(row []string) string { + return row[0] + }), func(columns [][]string, _ string) map[string]string { + return lo.SliceToMap(columns, func(col []string) (string, string) { + return col[1], col[2] + }) + }) +} + +func getPrometheusMetrics(t *testing.T, prometheusPort int, requiredMetrics ...string) map[string]*promClient.MetricFamily { + t.Helper() + + buf := make([]byte, 0) + url := fmt.Sprintf("http://localhost:%d/metrics", prometheusPort) + + require.Eventuallyf(t, func() bool { + resp, err := http.Get(url) + if err != nil { + t.Logf("Failed to fetch metrics: %v", err) + return false + } + defer httputil.CloseResponse(resp) + + buf, err = io.ReadAll(resp.Body) + if err != nil { + t.Logf("Failed to read response body: %v", err) + return false + } + + bufString := string(buf) + for _, metric := range requiredMetrics { + if !strings.Contains(bufString, metric) { + return false + } + } + return true + }, time.Minute, 100*time.Millisecond, "Cannot find metrics in time: %s", buf) + + metrics, err := testhelper.ParsePrometheusMetrics(bytes.NewBuffer(buf)) + require.NoError(t, err) + + return metrics +} diff --git a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml new file mode 100644 index 0000000000..0c9f5ea530 --- /dev/null +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + rudder-snowpipe-clients: + image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + ports: + - "9078" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:9078/health || exit 1 + interval: 1s + retries: 25 diff --git a/warehouse/integrations/testdata/docker-compose.transformer.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml similarity index 66% rename from warehouse/integrations/testdata/docker-compose.transformer.yml rename to integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml index 08899300e3..3141df23b2 100644 --- a/warehouse/integrations/testdata/docker-compose.transformer.yml +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml @@ -2,7 +2,7 @@ version: "3.9" services: transformer: - image: "rudderstack/rudder-transformer:latest" + image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/develop-rudder-transformer:latest" ports: - "9090:9090" healthcheck: diff --git a/router/batchrouter/asyncdestinationmanager/common/utils.go b/router/batchrouter/asyncdestinationmanager/common/utils.go index 8b541ea016..df711f816e 100644 --- a/router/batchrouter/asyncdestinationmanager/common/utils.go +++ b/router/batchrouter/asyncdestinationmanager/common/utils.go @@ -3,7 +3,7 @@ package common import "slices" var ( - asyncDestinations = []string{"MARKETO_BULK_UPLOAD", "BINGADS_AUDIENCE", "ELOQUA", "YANDEX_METRICA_OFFLINE_EVENTS", "BINGADS_OFFLINE_CONVERSIONS", "KLAVIYO_BULK_UPLOAD", "LYTICS_BULK_UPLOAD"} + asyncDestinations = []string{"MARKETO_BULK_UPLOAD", "BINGADS_AUDIENCE", "ELOQUA", "YANDEX_METRICA_OFFLINE_EVENTS", "BINGADS_OFFLINE_CONVERSIONS", "KLAVIYO_BULK_UPLOAD", "LYTICS_BULK_UPLOAD", "SNOWPIPE_STREAMING"} sftpDestinations = []string{"SFTP"} ) diff --git a/router/batchrouter/asyncdestinationmanager/manager.go b/router/batchrouter/asyncdestinationmanager/manager.go index 29544f2f36..032f5f875a 100644 --- a/router/batchrouter/asyncdestinationmanager/manager.go +++ b/router/batchrouter/asyncdestinationmanager/manager.go @@ -16,6 +16,7 @@ import ( lyticsBulkUpload "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/lytics_bulk_upload" marketobulkupload "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/marketo-bulk-upload" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/sftp" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/yandexmetrica" ) @@ -41,6 +42,8 @@ func newRegularManager( return klaviyobulkupload.NewManager(logger, statsFactory, destination) case "LYTICS_BULK_UPLOAD": return lyticsBulkUpload.NewManager(logger, statsFactory, destination) + case "SNOWPIPE_STREAMING": + return snowpipestreaming.New(conf, logger, statsFactory, destination), nil } return nil, errors.New("invalid destination type") } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go new file mode 100644 index 0000000000..4fd53b975d --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go @@ -0,0 +1,71 @@ +package snowpipestreaming + +import ( + "context" + + "github.com/rudderlabs/rudder-go-kit/stats" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +type apiAdapter struct { + stats struct { + createChannelCount stats.Counter + deleteChannelCount stats.Counter + insertCount stats.Counter + statusCount stats.Counter + createChannelResponseTime stats.Timer + deleteChannelResponseTime stats.Timer + insertResponseTime stats.Timer + statusResponseTime stats.Timer + } + + api +} + +func newApiAdapter(api api, statsFactory stats.Stats, destination *backendconfig.DestinationT) *apiAdapter { + adapter := &apiAdapter{} + adapter.api = api + + tags := stats.Tags{ + "module": "batch_router", + "workspaceId": destination.WorkspaceID, + "destType": destination.DestinationDefinition.Name, + "destinationId": destination.ID, + } + adapter.stats.createChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_count", stats.CountType, tags) + adapter.stats.deleteChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_count", stats.CountType, tags) + adapter.stats.insertCount = statsFactory.NewTaggedStat("snowpipestreaming_insert_count", stats.CountType, tags) + adapter.stats.statusCount = statsFactory.NewTaggedStat("snowpipestreaming_status_count", stats.CountType, tags) + adapter.stats.createChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_response_time", stats.TimerType, tags) + adapter.stats.deleteChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_response_time", stats.TimerType, tags) + adapter.stats.insertResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_insert_response_time", stats.TimerType, tags) + adapter.stats.statusResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_status_response_time", stats.TimerType, tags) + + return adapter +} + +func (a *apiAdapter) CreateChannel(ctx context.Context, req *model.CreateChannelRequest) (*model.ChannelResponse, error) { + defer a.stats.createChannelCount.Increment() + defer a.stats.createChannelResponseTime.RecordDuration()() + return a.api.CreateChannel(ctx, req) +} + +func (a *apiAdapter) DeleteChannel(ctx context.Context, channelID string, sync bool) error { + defer a.stats.deleteChannelCount.Increment() + defer a.stats.deleteChannelResponseTime.RecordDuration()() + return a.api.DeleteChannel(ctx, channelID, sync) +} + +func (a *apiAdapter) Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) { + defer a.stats.insertCount.Increment() + defer a.stats.insertResponseTime.RecordDuration()() + return a.api.Insert(ctx, channelID, insertRequest) +} + +func (a *apiAdapter) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { + defer a.stats.statusCount.Increment() + defer a.stats.statusResponseTime.RecordDuration()() + return a.api.Status(ctx, channelID) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go new file mode 100644 index 0000000000..7f7042866c --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go @@ -0,0 +1,166 @@ +package snowpipestreaming + +import ( + "context" + "fmt" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" + internalapi "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (m *Manager) createChannel( + ctx context.Context, + asyncDest *common.AsyncDestinationStruct, + destConf destConfig, + tableName string, + eventSchema whutils.ModelTableSchema, +) (*model.ChannelResponse, error) { + if response, ok := m.channelCache.Load(tableName); ok { + return response.(*model.ChannelResponse), nil + } + + req := &model.CreateChannelRequest{ + RudderIdentifier: asyncDest.Destination.ID, + Partition: m.config.instanceID, + AccountConfig: model.AccountConfig{ + Account: destConf.Account, + User: destConf.User, + Role: destConf.Role, + PrivateKey: whutils.FormatPemContent(destConf.PrivateKey), + PrivateKeyPassphrase: destConf.PrivateKeyPassphrase, + }, + TableConfig: model.TableConfig{ + Database: destConf.Database, + Schema: destConf.Namespace, + Table: tableName, + }, + } + + resp, err := m.api.CreateChannel(ctx, req) + if err != nil { + return nil, fmt.Errorf("creating channel: %v", err) + } + if resp.Success { + m.channelCache.Store(tableName, resp) + return resp, nil + } + + switch resp.Code { + case internalapi.ErrSchemaDoesNotExistOrNotAuthorized: + resp, err = m.handleSchemaError(ctx, req, eventSchema) + if err != nil { + return nil, fmt.Errorf("handling schema error: %v", err) + } + if !resp.Success { + return nil, fmt.Errorf("creating channel for schema error: %s", resp.Error) + } + m.channelCache.Store(tableName, resp) + return resp, nil + case internalapi.ErrTableDoesNotExistOrNotAuthorized: + resp, err = m.handleTableError(ctx, req, eventSchema) + if err != nil { + return nil, fmt.Errorf("handling table error: %v", err) + } + if !resp.Success { + return nil, fmt.Errorf("creating channel for table error: %s", resp.Error) + } + m.channelCache.Store(tableName, resp) + return resp, nil + default: + return nil, fmt.Errorf("creating channel: %v", err) + } +} + +func (m *Manager) handleSchemaError( + ctx context.Context, + channelReq *model.CreateChannelRequest, + eventSchema whutils.ModelTableSchema, +) (*model.ChannelResponse, error) { + m.stats.channelSchemaCreationErrorCount.Increment() + + snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) + if err != nil { + return nil, fmt.Errorf("creating snowflake manager: %v", err) + } + defer func() { + snowflakeManager.Cleanup(ctx) + }() + if err := snowflakeManager.CreateSchema(ctx); err != nil { + return nil, fmt.Errorf("creating schema: %v", err) + } + if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { + return nil, fmt.Errorf("creating table: %v", err) + } + return m.api.CreateChannel(ctx, channelReq) +} + +func (m *Manager) handleTableError( + ctx context.Context, + channelReq *model.CreateChannelRequest, + eventSchema whutils.ModelTableSchema, +) (*model.ChannelResponse, error) { + m.stats.channelTableCreationErrorCount.Increment() + + snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) + if err != nil { + return nil, fmt.Errorf("creating snowflake manager: %v", err) + } + defer func() { + snowflakeManager.Cleanup(ctx) + }() + if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { + return nil, fmt.Errorf("creating table: %v", err) + } + return m.api.CreateChannel(ctx, channelReq) +} + +func (m *Manager) recreateChannel( + ctx context.Context, + asyncDest *common.AsyncDestinationStruct, + destConf destConfig, + tableName string, + eventSchema whutils.ModelTableSchema, + existingChannelResponse *model.ChannelResponse, +) (*model.ChannelResponse, error) { + if err := m.deleteChannel(ctx, tableName, existingChannelResponse.ChannelID); err != nil { + return nil, fmt.Errorf("deleting channel: %v", err) + } + + channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + if err != nil { + return nil, fmt.Errorf("recreating channel: %v", err) + } + return channelResponse, nil +} + +func (m *Manager) deleteChannel(ctx context.Context, tableName string, channelID string) error { + m.channelCache.Delete(tableName) + if err := m.api.DeleteChannel(ctx, channelID, true); err != nil { + return fmt.Errorf("deleting channel: %v", err) + } + return nil +} + +func (m *Manager) createSnowflakeManager(ctx context.Context, namespace string) (manager.Manager, error) { + modelWarehouse := whutils.ModelWarehouse{ + WorkspaceID: m.destination.WorkspaceID, + Destination: *m.destination, + Namespace: namespace, + Type: m.destination.DestinationDefinition.Name, + Identifier: m.destination.WorkspaceID + ":" + m.destination.ID, + } + modelWarehouse.Destination.Config["useKeyPairAuth"] = true // Since we are currently only supporting key pair auth + + sf, err := manager.New(whutils.SNOWFLAKE, m.conf, m.logger, m.statsFactory) + if err != nil { + return nil, fmt.Errorf("creating snowflake manager: %v", err) + } + err = sf.Setup(ctx, modelWarehouse, &whutils.NopUploader{}) + if err != nil { + return nil, fmt.Errorf("setting up snowflake manager: %v", err) + } + return sf, nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go new file mode 100644 index 0000000000..a3173e8de1 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go @@ -0,0 +1,35 @@ +package snowpipestreaming + +import ( + "context" + "fmt" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (m *Manager) addColumns(ctx context.Context, namespace, tableName string, columns []whutils.ColumnInfo) error { + snowflakeManager, err := m.createSnowflakeManager(ctx, namespace) + if err != nil { + return fmt.Errorf("creating snowflake manager: %v", err) + } + defer func() { + snowflakeManager.Cleanup(ctx) + }() + if err = snowflakeManager.AddColumns(ctx, tableName, columns); err != nil { + return fmt.Errorf("adding columns: %v", err) + } + return nil +} + +func findNewColumns(eventSchema, snowPipeSchema whutils.ModelTableSchema) []whutils.ColumnInfo { + var newColumns []whutils.ColumnInfo + for column, dataType := range eventSchema { + if _, exists := snowPipeSchema[column]; !exists { + newColumns = append(newColumns, whutils.ColumnInfo{ + Name: column, + Type: dataType, + }) + } + } + return newColumns +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go new file mode 100644 index 0000000000..551f98cec8 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go @@ -0,0 +1,145 @@ +package snowpipestreaming + +import ( + "context" + "fmt" + "strconv" + + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-go-kit/logger" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/warehouse/slave" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (m *Manager) loadDiscardsToSnowPipe( + ctx context.Context, + asyncDest *common.AsyncDestinationStruct, + destConf destConfig, + discardInfos []discardInfo, +) (*uploadInfo, error) { + tableName, eventSchema := discardsTable(), discardsSchema() + + log := m.logger.Withn( + logger.NewStringField("table", tableName), + logger.NewIntField("events", int64(len(discardInfos))), + ) + log.Infon("Uploading data to table") + + channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + if err != nil { + return nil, fmt.Errorf("creating channel: %v", err) + } + + columnInfos := findNewColumns(eventSchema, channelResponse.SnowPipeSchema()) + if len(columnInfos) > 0 { + if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { + return nil, fmt.Errorf("adding columns: %v", err) + } + + channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) + if err != nil { + return nil, fmt.Errorf("recreating channel: %v", err) + } + } + + offset := strconv.FormatInt(m.now().Unix(), 10) + + insertReq := &model.InsertRequest{ + Rows: createRowsFromDiscardInfos(discardInfos), + Offset: offset, + } + insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) + if err != nil { + if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { + m.logger.Warnn("Failed to delete channel", + logger.NewStringField("table", tableName), + obskit.Error(deleteErr), + ) + } + return nil, fmt.Errorf("inserting data: %v", err) + } + if !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { + m.logger.Warnn("Failed to delete channel", + logger.NewStringField("table", tableName), + obskit.Error(deleteErr), + ) + } + return nil, errInsertingDataFailed + } + m.logger.Infon("Successfully uploaded data to table", + logger.NewStringField("table", tableName), + logger.NewIntField("events", int64(len(discardInfos))), + ) + m.stats.discardCount.Count(len(discardInfos)) + + idOffset := &uploadInfo{ + ChannelID: channelResponse.ChannelID, + Offset: offset, + Table: tableName, + } + return idOffset, nil +} + +func discardsTable() string { + return whutils.ToProviderCase(whutils.SNOWFLAKE, whutils.DiscardsTable) +} + +func discardsSchema() whutils.ModelTableSchema { + return lo.MapEntries(whutils.DiscardsSchema, func(colName, colType string) (string, string) { + return whutils.ToProviderCase(whutils.SNOWFLAKE, colName), colType + }) +} + +func createRowsFromDiscardInfos(discardInfos []discardInfo) []model.Row { + return lo.FilterMap(discardInfos, func(info discardInfo, _ int) (model.Row, bool) { + id, idExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "id")] + receivedAt, receivedAtExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "received_at")] + + if !idExists || !receivedAtExists { + return nil, false + } + + return model.Row{ + "column_name": info.colName, + "column_value": info.eventData[info.colName], + "reason": info.reason, + "received_at": receivedAt, + "row_id": id, + "table_name": info.table, + "uuid_ts": info.uuidTS, + }, true + }) +} + +func discardedRecords( + event event, + snowPipeSchema whutils.ModelTableSchema, + tableName string, + formattedTS string, +) (discardedRecords []discardInfo) { + for colName, actualType := range event.Message.Metadata.Columns { + if expectedType, exists := snowPipeSchema[colName]; exists && actualType != expectedType { + if convertedVal, err := slave.HandleSchemaChange(expectedType, actualType, event.Message.Data[colName]); err != nil { + // Discard value if conversion fails + event.Message.Data[colName] = nil + discardedRecords = append(discardedRecords, discardInfo{ + table: tableName, + colName: colName, + eventData: event.Message.Data, + reason: err.Error(), + uuidTS: formattedTS, + }) + } else { + // Update value if conversion succeeds + event.Message.Data[colName] = convertedVal + } + } + } + return discardedRecords +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go new file mode 100644 index 0000000000..7d0a8aa7fb --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go @@ -0,0 +1,27 @@ +package api + +import ( + "io" + "net/http" +) + +type API struct { + clientURL string + requestDoer requestDoer +} + +type requestDoer interface { + Do(*http.Request) (*http.Response, error) +} + +func New(clientURL string, requestDoer requestDoer) *API { + return &API{ + clientURL: clientURL, + requestDoer: requestDoer, + } +} + +func mustReadAll(r io.Reader) []byte { + data, _ := io.ReadAll(r) + return data +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go new file mode 100644 index 0000000000..70d363d8bc --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go @@ -0,0 +1,320 @@ +package api + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" + "testing" + "testing/iotest" + "time" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/compose-test/compose" + "github.com/rudderlabs/compose-test/testcompose" + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper" + "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" + thwh "github.com/rudderlabs/rudder-server/testhelper/warehouse" + "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +type mockRequestDoer struct { + err error + response *http.Response +} + +func (c *mockRequestDoer) Do(*http.Request) (*http.Response, error) { + return c.response, c.err +} + +type nopReadCloser struct { + io.Reader +} + +func (nopReadCloser) Close() error { + return nil +} + +func TestMustReadAll(t *testing.T) { + t.Run("ReadAll", func(t *testing.T) { + r := strings.NewReader("hello") + data := mustReadAll(r) + require.Equal(t, []byte("hello"), data) + }) + t.Run("ReadAll error", func(t *testing.T) { + r := iotest.ErrReader(errors.New("error")) + data := mustReadAll(r) + require.Empty(t, data) + }) +} + +func TestAPI(t *testing.T) { + for _, key := range []string{ + testhelper.TestKeyPairUnencrypted, + } { + if _, exists := os.LookupEnv(key); !exists { + if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { + t.Fatalf("%s environment variable not set", key) + } + t.Skipf("Skipping %s as %s is not set", t.Name(), key) + } + } + + t.Run("Create channel + Get channel + Insert data + Status", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"../../testdata/docker-compose.rudder-snowpipe-clients.yml"})) + c.Start(context.Background()) + + credentials, err := testhelper.GetSnowPipeTestCredentials(testhelper.TestKeyPairUnencrypted) + require.NoError(t, err) + + ctx := context.Background() + + namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + table := "TEST_TABLE" + tableSchema := whutils.ModelTableSchema{ + "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", + } + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithConfigOption("account", credentials.Account). + WithConfigOption("warehouse", credentials.Warehouse). + WithConfigOption("database", credentials.Database). + WithConfigOption("role", credentials.Role). + WithConfigOption("user", credentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", credentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", credentials.PrivateKeyPassphrase). + Build() + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + t.Log("Creating namespace and table") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, table, tableSchema)) + + snowPipeClientsURL := fmt.Sprintf("http://localhost:%d", c.Port("rudder-snowpipe-clients", 9078)) + api := New(snowPipeClientsURL, http.DefaultClient) + + t.Log("Creating channel") + createChannelRes, err := api.CreateChannel(ctx, &model.CreateChannelRequest{ + RudderIdentifier: "1", + Partition: "1", + AccountConfig: model.AccountConfig{ + Account: credentials.Account, + User: credentials.User, + Role: credentials.Role, + PrivateKey: strings.ReplaceAll(credentials.PrivateKey, "\n", "\\\\\n"), + PrivateKeyPassphrase: credentials.PrivateKeyPassphrase, + }, + TableConfig: model.TableConfig{ + Database: credentials.Database, + Schema: namespace, + Table: table, + }, + }) + require.NoError(t, err) + require.NotEmpty(t, createChannelRes.ChannelID) + require.True(t, createChannelRes.Valid) + require.False(t, createChannelRes.Deleted) + require.EqualValues(t, map[string]map[string]interface{}{ + "ACTIVE": { + "byteLength": nil, + "length": nil, + "logicalType": "BOOLEAN", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "BOOLEAN", + }, + "AGE": { + "byteLength": nil, + "length": nil, + "logicalType": "FIXED", + "nullable": true, + "precision": float64(38), + "scale": float64(0), + "type": "NUMBER(38,0)", + }, + "DOB": { + "byteLength": nil, + "length": nil, + "logicalType": "TIMESTAMP_TZ", + "nullable": true, + "precision": float64(0), + "scale": float64(9), + "type": "TIMESTAMP_TZ(9)", + }, + "EMAIL": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "ID": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "NAME": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + }, + createChannelRes.TableSchema, + ) + + t.Log("Getting channel") + getChannelRes, err := api.GetChannel(ctx, createChannelRes.ChannelID) + require.NoError(t, err) + require.Equal(t, createChannelRes, getChannelRes) + + t.Log("Inserting records") + insertRes, err := api.Insert(ctx, createChannelRes.ChannelID, &model.InsertRequest{ + Rows: []model.Row{ + {"ID": "ID1", "NAME": "Alice Johnson", "EMAIL": "alice.johnson@example.com", "AGE": 28, "ACTIVE": true, "DOB": "1995-06-15T12:30:00Z"}, + {"ID": "ID2", "NAME": "Bob Smith", "EMAIL": "bob.smith@example.com", "AGE": 35, "ACTIVE": true, "DOB": "1988-01-20T09:30:00Z"}, + {"ID": "ID3", "NAME": "Charlie Brown", "EMAIL": "charlie.brown@example.com", "AGE": 22, "ACTIVE": false, "DOB": "2001-11-05T14:45:00Z"}, + {"ID": "ID4", "NAME": "Diana Prince", "EMAIL": "diana.prince@example.com", "AGE": 30, "ACTIVE": true, "DOB": "1993-08-18T08:15:00Z"}, + {"ID": "ID5", "NAME": "Eve Adams", "AGE": 45, "ACTIVE": true, "DOB": "1978-03-22T16:50:00Z"}, // -- No email + {"ID": "ID6", "NAME": "Frank Castle", "EMAIL": "frank.castle@example.com", "AGE": 38, "ACTIVE": false, "DOB": "1985-09-14T10:10:00Z"}, + {"ID": "ID7", "NAME": "Grace Hopper", "EMAIL": "grace.hopper@example.com", "AGE": 85, "ACTIVE": true, "DOB": "1936-12-09T11:30:00Z"}, + }, + Offset: "8", + }) + require.NoError(t, err) + require.Equal(t, &model.InsertResponse{Success: true, Errors: nil}, insertRes) + + t.Log("Checking status") + require.Eventually(t, func() bool { + statusRes, err := api.Status(ctx, createChannelRes.ChannelID) + if err != nil { + t.Log("Error getting status:", err) + return false + } + return statusRes.Offset == "8" + }, + 30*time.Second, + 300*time.Millisecond, + ) + + t.Log("Checking records in warehouse") + records := thwh.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT ID, NAME, EMAIL, AGE, ACTIVE, DOB FROM %q.%q ORDER BY ID;`, namespace, table)) + require.ElementsMatch(t, [][]string{ + {"ID1", "Alice Johnson", "alice.johnson@example.com", "28", "true", "1995-06-15T12:30:00Z"}, + {"ID2", "Bob Smith", "bob.smith@example.com", "35", "true", "1988-01-20T09:30:00Z"}, + {"ID3", "Charlie Brown", "charlie.brown@example.com", "22", "false", "2001-11-05T14:45:00Z"}, + {"ID4", "Diana Prince", "diana.prince@example.com", "30", "true", "1993-08-18T08:15:00Z"}, + {"ID5", "Eve Adams", "", "45", "true", "1978-03-22T16:50:00Z"}, + {"ID6", "Frank Castle", "frank.castle@example.com", "38", "false", "1985-09-14T10:10:00Z"}, + {"ID7", "Grace Hopper", "grace.hopper@example.com", "85", "true", "1936-12-09T11:30:00Z"}, + }, records) + }) + + t.Run("Create + Delete channel", func(t *testing.T) { + c := testcompose.New(t, compose.FilePaths([]string{"../../testdata/docker-compose.rudder-snowpipe-clients.yml"})) + c.Start(context.Background()) + + credentials, err := testhelper.GetSnowPipeTestCredentials(testhelper.TestKeyPairUnencrypted) + require.NoError(t, err) + + ctx := context.Background() + + namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + table := "TEST_TABLE" + tableSchema := whutils.ModelTableSchema{ + "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", + } + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithConfigOption("account", credentials.Account). + WithConfigOption("warehouse", credentials.Warehouse). + WithConfigOption("database", credentials.Database). + WithConfigOption("role", credentials.Role). + WithConfigOption("user", credentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", credentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", credentials.PrivateKeyPassphrase). + Build() + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + t.Log("Creating namespace and table") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, table, tableSchema)) + + snowPipeClientsURL := fmt.Sprintf("http://localhost:%d", c.Port("rudder-snowpipe-clients", 9078)) + api := New(snowPipeClientsURL, http.DefaultClient) + + t.Log("Creating channel") + createChannelReq := &model.CreateChannelRequest{ + RudderIdentifier: "1", + Partition: "1", + AccountConfig: model.AccountConfig{ + Account: credentials.Account, + User: credentials.User, + Role: credentials.Role, + PrivateKey: strings.ReplaceAll(credentials.PrivateKey, "\n", "\\\\\n"), + PrivateKeyPassphrase: credentials.PrivateKeyPassphrase, + }, + TableConfig: model.TableConfig{ + Database: credentials.Database, + Schema: namespace, + Table: table, + }, + } + createChannelRes1, err := api.CreateChannel(ctx, createChannelReq) + require.NoError(t, err) + require.True(t, createChannelRes1.Valid) + + t.Log("Creating channel again, should return the same channel id") + createChannelRes2, err := api.CreateChannel(ctx, createChannelReq) + require.NoError(t, err) + require.True(t, createChannelRes2.Valid) + require.Equal(t, createChannelRes1, createChannelRes2) + + t.Log("Deleting channel") + err = api.DeleteChannel(ctx, createChannelRes1.ChannelID, true) + require.NoError(t, err) + + t.Log("Creating channel again, should return a new channel id") + createChannelRes3, err := api.CreateChannel(ctx, createChannelReq) + require.NoError(t, err) + require.NotEqual(t, createChannelRes1.ChannelID, createChannelRes3.ChannelID) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go new file mode 100644 index 0000000000..6553df16f5 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go @@ -0,0 +1,43 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/rudderlabs/rudder-go-kit/httputil" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func (a *API) CreateChannel(ctx context.Context, channelReq *model.CreateChannelRequest) (*model.ChannelResponse, error) { + reqJSON, err := json.Marshal(channelReq) + if err != nil { + return nil, fmt.Errorf("marshalling create channel request: %w", err) + } + + channelReqURL := a.clientURL + "/channels" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, channelReqURL, bytes.NewBuffer(reqJSON)) + if err != nil { + return nil, fmt.Errorf("creating create channel request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, reqErr := a.requestDoer.Do(req) + if reqErr != nil { + return nil, fmt.Errorf("sending create channel request: %w", reqErr) + } + defer func() { httputil.CloseResponse(resp) }() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("invalid status code for create channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + } + + var res model.ChannelResponse + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + return nil, fmt.Errorf("decoding create channel response: %w", err) + } + return &res, nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go new file mode 100644 index 0000000000..dc7975e6d6 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go @@ -0,0 +1,172 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func TestCreateChannel(t *testing.T) { + ccr := &model.CreateChannelRequest{ + RudderIdentifier: "rudderIdentifier", + Partition: "partition", + AccountConfig: model.AccountConfig{ + Account: "account", + User: "user", + Role: "role", + PrivateKey: "privateKey", + PrivateKeyPassphrase: "privateKeyPassphrase", + }, + TableConfig: model.TableConfig{ + Database: "database", + Schema: "schema", + Table: "table", + }, + } + + successSnowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + ccrJSON, err := json.Marshal(ccr) + require.NoError(t, err) + + body, err := io.ReadAll(r.Body) + require.NoError(t, err) + require.NoError(t, r.Body.Close()) + require.JSONEq(t, string(ccrJSON), string(body)) + + switch r.URL.String() { + case "/channels": + _, err := w.Write([]byte(`{"success": true,"channelId":"channelId","channelName":"channelName","clientName":"clientName","valid":true,"deleted":false,"tableSchema":{"EVENT":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"ID":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"TIMESTAMP":{"type":"TIMESTAMP_TZ(9)","logicalType":"TIMESTAMP_TZ","precision":0,"scale":9,"byteLength":null,"length":null,"nullable":true}}}`)) + require.NoError(t, err) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer successSnowPipeServer.Close() + + failureSnowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + ccrJSON, err := json.Marshal(ccr) + require.NoError(t, err) + + body, err := io.ReadAll(r.Body) + require.NoError(t, err) + require.NoError(t, r.Body.Close()) + require.JSONEq(t, string(ccrJSON), string(body)) + + switch r.URL.String() { + case "/channels": + _, err := w.Write([]byte(`{"success":false,"error":"Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.","code":"ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED","snowflakeSDKCode":"0007","snowflakeAPIHttpCode":400,"snowflakeAPIStatusCode":4,"snowflakeAPIMessage":"The supplied table does not exist or is not authorized."}`)) + require.NoError(t, err) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer failureSnowPipeServer.Close() + + ctx := context.Background() + + t.Run("Status=200(success=true)", func(t *testing.T) { + manager := New(successSnowPipeServer.URL, successSnowPipeServer.Client()) + res, err := manager.CreateChannel(ctx, ccr) + require.NoError(t, err) + require.EqualValues(t, &model.ChannelResponse{ + Success: true, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + TableSchema: map[string]map[string]interface{}{ + "EVENT": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "ID": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "TIMESTAMP": { + "byteLength": nil, + "length": nil, + "logicalType": "TIMESTAMP_TZ", + "nullable": true, + "precision": float64(0), + "scale": float64(9), + "type": "TIMESTAMP_TZ(9)", + }, + }, + }, + res, + ) + }) + t.Run("Status=200(success=false)", func(t *testing.T) { + manager := New(failureSnowPipeServer.URL, failureSnowPipeServer.Client()) + res, err := manager.CreateChannel(ctx, ccr) + require.NoError(t, err) + require.EqualValues(t, &model.ChannelResponse{ + Success: false, + Error: "Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.", + Code: "ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED", + SnowflakeSDKCode: "0007", + SnowflakeAPIHttpCode: 400, + SnowflakeAPIStatusCode: 4, + SnowflakeAPIMessage: "The supplied table does not exist or is not authorized.", + }, + res, + ) + }) + t.Run("Request failure", func(t *testing.T) { + manager := New(successSnowPipeServer.URL, &mockRequestDoer{ + err: errors.New("bad client"), + }) + res, err := manager.CreateChannel(ctx, ccr) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (non 200's status code)", func(t *testing.T) { + manager := New(successSnowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusBadRequest, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{}`))}, + }, + }) + res, err := manager.CreateChannel(ctx, ccr) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (invalid response)", func(t *testing.T) { + manager := New(successSnowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusOK, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{abd}`))}, + }, + }) + res, err := manager.CreateChannel(ctx, ccr) + require.Error(t, err) + require.Nil(t, res) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go new file mode 100644 index 0000000000..f95c1d14fe --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go @@ -0,0 +1,34 @@ +package api + +import ( + "context" + "fmt" + "net/http" + "strconv" + + "github.com/rudderlabs/rudder-go-kit/httputil" +) + +func (a *API) DeleteChannel(ctx context.Context, channelID string, sync bool) error { + deleteChannelURL := a.clientURL + "/channels/" + channelID + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, deleteChannelURL, nil) + if err != nil { + return fmt.Errorf("creating delete channel request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + queryParams := req.URL.Query() + queryParams.Add("sync", strconv.FormatBool(sync)) + req.URL.RawQuery = queryParams.Encode() + + resp, reqErr := a.requestDoer.Do(req) + if reqErr != nil { + return fmt.Errorf("sending delete channel request: %w", reqErr) + } + defer func() { httputil.CloseResponse(resp) }() + + if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusAccepted { + return fmt.Errorf("invalid status code for delete channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + } + return nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel_test.go new file mode 100644 index 0000000000..3966579bfa --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel_test.go @@ -0,0 +1,63 @@ +package api + +import ( + "bytes" + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDeleteChannel(t *testing.T) { + channelID := "channelID" + + snowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodDelete, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + switch r.URL.String() { + case "/channels/" + channelID + "?sync=true": + w.WriteHeader(http.StatusNoContent) + case "/channels/" + channelID + "?sync=false": + w.WriteHeader(http.StatusAccepted) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer snowPipeServer.Close() + + ctx := context.Background() + + t.Run("Success", func(t *testing.T) { + t.Run("sync=true", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + err := manager.DeleteChannel(ctx, channelID, true) + require.NoError(t, err) + }) + t.Run("sync=false", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + err := manager.DeleteChannel(ctx, channelID, false) + require.NoError(t, err) + }) + }) + t.Run("Request failure", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + err: errors.New("bad client"), + }) + err := manager.DeleteChannel(ctx, channelID, true) + require.Error(t, err) + }) + t.Run("Request failure (non 200's status code)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusBadRequest, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{}`))}, + }, + }) + err := manager.DeleteChannel(ctx, channelID, true) + require.Error(t, err) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go new file mode 100644 index 0000000000..381129ff6d --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go @@ -0,0 +1,12 @@ +package api + +var ( + ErrUnknownError = "ERR_UNKNOWN_ERROR" + ErrValidationError = "ERR_VALIDATION_ERROR" + ErrAuthenticationFailed = "ERR_AUTHENTICATION_FAILED" + ErrRoleDoesNotExistOrNotAuthorized = "ERR_ROLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED" + ErrDatabaseDoesNotExistOrNotAuthorized = "ERR_DATABASE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED" + ErrSchemaDoesNotExistOrNotAuthorized = "ERR_SCHEMA_DOES_NOT_EXIST_OR_NOT_AUTHORIZED" + ErrTableDoesNotExistOrNotAuthorized = "ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED" + ErrSchemaConflict = "ERR_SCHEMA_CONFLICT" +) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go new file mode 100644 index 0000000000..ba9210c209 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go @@ -0,0 +1,37 @@ +package api + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/rudderlabs/rudder-go-kit/httputil" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func (a *API) GetChannel(ctx context.Context, channelID string) (*model.ChannelResponse, error) { + getChannelURL := a.clientURL + "/channels/" + channelID + req, err := http.NewRequestWithContext(ctx, http.MethodGet, getChannelURL, nil) + if err != nil { + return nil, fmt.Errorf("creating get channel request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, reqErr := a.requestDoer.Do(req) + if reqErr != nil { + return nil, fmt.Errorf("sending get channel request: %w", reqErr) + } + defer func() { httputil.CloseResponse(resp) }() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("invalid status code for get channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + } + + var res model.ChannelResponse + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + return nil, fmt.Errorf("decoding get channel response: %w", err) + } + return &res, nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go new file mode 100644 index 0000000000..8dc2d654b4 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go @@ -0,0 +1,108 @@ +package api + +import ( + "bytes" + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func TestGetChannel(t *testing.T) { + channelID := "channelId" + + snowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodGet, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + switch r.URL.String() { + case "/channels/" + channelID: + _, err := w.Write([]byte(`{"channelId":"channelId","channelName":"channelName","clientName":"clientName","valid":true,"deleted":false,"tableSchema":{"EVENT":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"ID":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"TIMESTAMP":{"type":"TIMESTAMP_TZ(9)","logicalType":"TIMESTAMP_TZ","precision":0,"scale":9,"byteLength":null,"length":null,"nullable":true}}}`)) + require.NoError(t, err) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer snowPipeServer.Close() + + ctx := context.Background() + + t.Run("Success", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + res, err := manager.GetChannel(ctx, channelID) + require.NoError(t, err) + require.EqualValues(t, &model.ChannelResponse{ + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + TableSchema: map[string]map[string]interface{}{ + "EVENT": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "ID": { + "byteLength": 1.6777216e+07, + "length": 1.6777216e+07, + "logicalType": "TEXT", + "nullable": true, + "precision": nil, + "scale": nil, + "type": "VARCHAR(16777216)", + }, + "TIMESTAMP": { + "byteLength": nil, + "length": nil, + "logicalType": "TIMESTAMP_TZ", + "nullable": true, + "precision": float64(0), + "scale": float64(9), + "type": "TIMESTAMP_TZ(9)", + }, + }, + }, + res, + ) + }) + t.Run("Request failure", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + err: errors.New("bad client"), + }) + res, err := manager.GetChannel(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (non 200's status code)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusBadRequest, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{}`))}, + }, + }) + res, err := manager.GetChannel(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (invalid response)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusOK, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{abd}`))}, + }, + }) + res, err := manager.GetChannel(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go new file mode 100644 index 0000000000..9607bc772a --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go @@ -0,0 +1,42 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/utils/httputil" +) + +func (a *API) Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) { + reqJSON, err := json.Marshal(insertRequest) + if err != nil { + return nil, fmt.Errorf("marshalling insert request: %w", err) + } + + insertReqURL := a.clientURL + "/channels/" + channelID + "/insert" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, insertReqURL, bytes.NewBuffer(reqJSON)) + if err != nil { + return nil, fmt.Errorf("creating insert request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, reqErr := a.requestDoer.Do(req) + if reqErr != nil { + return nil, fmt.Errorf("sending insert request: %w", reqErr) + } + defer func() { httputil.CloseResponse(resp) }() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("invalid status code for insert: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + } + + var res model.InsertResponse + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + return nil, fmt.Errorf("decoding insert response: %w", err) + } + return &res, nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go new file mode 100644 index 0000000000..2da32d898e --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go @@ -0,0 +1,114 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func TestInsert(t *testing.T) { + successChannelID := "successChannelID" + failureChannelID := "failureChannelID" + ir := &model.InsertRequest{Rows: []model.Row{{"key1": "value1"}, {"key2": "value2"}}, Offset: "5"} + + snowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + irJSON, err := json.Marshal(ir) + require.NoError(t, err) + + body, err := io.ReadAll(r.Body) + require.NoError(t, err) + require.NoError(t, r.Body.Close()) + require.JSONEq(t, string(irJSON), string(body)) + + switch r.URL.String() { + case "/channels/" + successChannelID + "/insert": + _, err := w.Write([]byte(`{"success":true}`)) + require.NoError(t, err) + case "/channels/" + failureChannelID + "/insert": + _, err := w.Write([]byte(`{"success":false,"errors":[{"message":"The given row cannot be converted to the internal format due to invalid value: Value cannot be ingested into Snowflake column RECEIVED_AT of type TIMESTAMP, rowIndex:0, reason: Not a valid value, see https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for the list of supported formats","rowIndex":0,"missingNotNullColNames":null,"nullValueForNotNullColNames":null,"extraColNames":null},{"message":"The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1","rowIndex":1,"missingNotNullColNames":null,"nullValueForNotNullColNames":null,"extraColNames":["UNKNOWN"]}],"code":"ERR_SCHEMA_CONFLICT"}`)) + require.NoError(t, err) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer snowPipeServer.Close() + + ctx := context.Background() + + t.Run("Insert success", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + res, err := manager.Insert(ctx, successChannelID, ir) + require.NoError(t, err) + require.Equal(t, &model.InsertResponse{Success: true, Errors: nil}, res) + }) + t.Run("Insert failure", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + res, err := manager.Insert(ctx, failureChannelID, ir) + require.NoError(t, err) + require.Equal(t, &model.InsertResponse{ + Success: false, + Errors: []model.InsertError{ + { + RowIndex: 0, + ExtraColNames: nil, + MissingNotNullColNames: nil, + NullValueForNotNullColNames: nil, + Message: "The given row cannot be converted to the internal format due to invalid value: Value cannot be ingested into Snowflake column RECEIVED_AT of type TIMESTAMP, rowIndex:0, reason: Not a valid value, see https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for the list of supported formats", + }, + { + RowIndex: 1, + ExtraColNames: []string{"UNKNOWN"}, + NullValueForNotNullColNames: nil, + Message: "The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1"}, + }, + Code: "ERR_SCHEMA_CONFLICT", + }, + res, + ) + }) + t.Run("Request failure", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + err: errors.New("bad client"), + response: &http.Response{ + StatusCode: http.StatusOK, + }, + }) + res, err := manager.Insert(ctx, successChannelID, ir) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (non 200's status code)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusBadRequest, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{}`))}, + }, + }) + res, err := manager.Insert(ctx, successChannelID, ir) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (invalid response)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusOK, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{abd}`))}, + }, + }) + res, err := manager.Insert(ctx, successChannelID, ir) + require.Error(t, err) + require.Nil(t, res) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go new file mode 100644 index 0000000000..47b496563b --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go @@ -0,0 +1,36 @@ +package api + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/utils/httputil" +) + +func (a *API) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { + statusReqURL := a.clientURL + "/channels/" + channelID + "/status" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusReqURL, nil) + if err != nil { + return nil, fmt.Errorf("creating status request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, reqErr := a.requestDoer.Do(req) + if reqErr != nil { + return nil, fmt.Errorf("sending status request: %w", reqErr) + } + defer func() { httputil.CloseResponse(resp) }() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("invalid status code for status: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + } + + var res model.StatusResponse + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + return nil, fmt.Errorf("decoding status response: %w", err) + } + return &res, nil +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status_test.go new file mode 100644 index 0000000000..8ee66707ab --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status_test.go @@ -0,0 +1,80 @@ +package api + +import ( + "bytes" + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" +) + +func TestStatus(t *testing.T) { + channelID := "channelID" + + snowPipeServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodGet, r.Method) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + + switch r.URL.String() { + case "/channels/" + channelID + "/status": + _, err := w.Write([]byte(`{"success": true, "offset":"5","valid":true}`)) + require.NoError(t, err) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer snowPipeServer.Close() + + ctx := context.Background() + + t.Run("Success", func(t *testing.T) { + manager := New(snowPipeServer.URL, snowPipeServer.Client()) + res, err := manager.Status(ctx, channelID) + require.NoError(t, err) + require.Equal(t, &model.StatusResponse{ + Success: true, + Offset: "5", + Valid: true, + }, + res, + ) + }) + t.Run("Request failure", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + err: errors.New("bad client"), + response: &http.Response{ + StatusCode: http.StatusOK, + }, + }) + res, err := manager.Status(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (non 200's status code)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusBadRequest, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{}`))}, + }, + }) + res, err := manager.Status(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) + t.Run("Request failure (invalid response)", func(t *testing.T) { + manager := New(snowPipeServer.URL, &mockRequestDoer{ + response: &http.Response{ + StatusCode: http.StatusOK, + Body: nopReadCloser{Reader: bytes.NewReader([]byte(`{abd}`))}, + }, + }) + res, err := manager.Status(ctx, channelID) + require.Error(t, err) + require.Nil(t, res) + }) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go new file mode 100644 index 0000000000..e429d19bf5 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go @@ -0,0 +1,96 @@ +package model + +import ( + "regexp" + + "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + reType = regexp.MustCompile(`(.+?)\([^)]*\)`) +) + +type ( + CreateChannelRequest struct { + RudderIdentifier string `json:"rudderIdentifier"` + Partition string `json:"partition"` + AccountConfig AccountConfig `json:"account"` + TableConfig TableConfig `json:"table"` + } + AccountConfig struct { + Account string `json:"account"` + User string `json:"user"` + Role string `json:"role"` + PrivateKey string `json:"privateKey"` + PrivateKeyPassphrase string `json:"privateKeyPassphrase"` + } + TableConfig struct { + Database string `json:"database"` + Schema string `json:"schema"` + Table string `json:"table"` + } + + ChannelResponse struct { + Success bool `json:"success"` + ChannelID string `json:"channelId"` + ChannelName string `json:"channelName"` + ClientName string `json:"clientName"` + Valid bool `json:"valid"` + Deleted bool `json:"deleted"` + TableSchema map[string]map[string]any `json:"tableSchema"` + Error string `json:"error"` + Code string `json:"code"` + SnowflakeSDKCode string `json:"snowflakeSDKCode"` + SnowflakeAPIHttpCode int64 `json:"snowflakeAPIHttpCode"` + SnowflakeAPIStatusCode int64 `json:"snowflakeAPIStatusCode"` + SnowflakeAPIMessage string `json:"snowflakeAPIMessage"` + } + + InsertRequest struct { + Rows []Row `json:"rows"` + Offset string `json:"offset"` + } + Row map[string]any + + InsertResponse struct { + Success bool `json:"success"` + Errors []InsertError `json:"errors"` + Code string `json:"code"` + } + InsertError struct { + RowIndex int64 `json:"rowIndex"` + ExtraColNames []string `json:"extraColNames"` + MissingNotNullColNames []string `json:"missingNotNullColNames"` + NullValueForNotNullColNames []string `json:"nullValueForNotNullColNames"` + Message string `json:"message"` + } + + StatusResponse struct { + Success bool `json:"success"` + Offset string `json:"offset"` + Valid bool `json:"valid"` + } +) + +func (c *ChannelResponse) SnowPipeSchema() whutils.ModelTableSchema { + warehouseSchema := make(whutils.ModelTableSchema) + + for column, info := range c.TableSchema { + dataType, isValidType := info["type"].(string) + if !isValidType { + continue + } + + numericScale := int64(0) + if scale, scaleExists := info["scale"].(float64); scaleExists { + numericScale = int64(scale) + } + + cleanedDataType := reType.ReplaceAllString(dataType, "$1") + + snowflakeType, _ := snowflake.CalculateDataType(cleanedDataType, numericScale) + warehouseSchema[column] = snowflakeType + } + return warehouseSchema +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go new file mode 100644 index 0000000000..9c8696fc02 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go @@ -0,0 +1,93 @@ +package model + +import ( + "testing" + + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestTypeRegex(t *testing.T) { + testCases := []struct { + input, expected string + }{ + {"", ""}, + {"VARCHAR", "VARCHAR"}, + {"NUMBER(38,0)", "NUMBER"}, + {"TIMESTAMP_TZ(9)", "TIMESTAMP_TZ"}, + {"VARCHAR(16777216)", "VARCHAR"}, + {"CustomType('abc', 'def')", "CustomType"}, + {"CustomType('abc', 'def')", "CustomType"}, + {"RandomType(anything!)", "RandomType"}, + } + for _, tc := range testCases { + require.Equal(t, tc.expected, reType.ReplaceAllString(tc.input, "$1")) + } +} + +func TestChannelResponse_SnowPipeSchema(t *testing.T) { + testCases := []struct { + name string + tableSchema map[string]map[string]interface{} + expected whutils.ModelTableSchema + }{ + { + name: "Valid types with scale", + tableSchema: map[string]map[string]interface{}{ + "column1": {"type": "VARCHAR(16777216)"}, + "column2": {"type": "NUMBER(2,0)", "scale": 2.0}, + "column3": {"type": "NUMBER(2,0)", "scale": 0.0}, + "column4": {"type": "NUMBER(2,0)", "scale": 0}, + "column5": {"type": "BOOLEAN"}, + "column6": {"type": "TIMESTAMP_TZ(9)", "scale": float64(9)}, + }, + expected: whutils.ModelTableSchema{ + "column1": "string", + "column2": "float", + "column3": "int", + "column4": "int", + "column5": "boolean", + "column6": "datetime", + }, + }, + { + name: "Invalid type field", + tableSchema: map[string]map[string]interface{}{ + "column1": {"type": 12345}, + }, + expected: whutils.ModelTableSchema{}, + }, + { + name: "Missing scale for number", + tableSchema: map[string]map[string]interface{}{ + "column1": {"type": "NUMBER(2,0)"}, + }, + expected: whutils.ModelTableSchema{ + "column1": "int", + }, + }, + { + name: "Empty table schema", + tableSchema: map[string]map[string]interface{}{}, + expected: whutils.ModelTableSchema{}, + }, + { + name: "Type with regex cleaning", + tableSchema: map[string]map[string]interface{}{ + "column1": {"type": "VARCHAR(255)"}, + }, + expected: whutils.ModelTableSchema{ + "column1": "string", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := &ChannelResponse{} + c.TableSchema = tc.tableSchema + require.Equal(t, tc.expected, c.SnowPipeSchema()) + }) + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go new file mode 100644 index 0000000000..d7a1bedd15 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go @@ -0,0 +1,9 @@ +package snowpipestreaming + +type Opt func(*Manager) + +func WithRequestDoer(requestDoer requestDoer) Opt { + return func(s *Manager) { + s.requestDoer = requestDoer + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go new file mode 100644 index 0000000000..b34c911dc5 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go @@ -0,0 +1,103 @@ +package snowpipestreaming + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/rudderlabs/rudder-go-kit/stringify" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "golang.org/x/sync/errgroup" + + "github.com/rudderlabs/rudder-go-kit/logger" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" +) + +func (m *Manager) Poll(pollInput common.AsyncPoll) common.PollStatusResponse { + m.logger.Infon("Polling started", logger.NewStringField("importId", pollInput.ImportId)) + + var uploadInfos []uploadInfo + err := json.Unmarshal([]byte(pollInput.ImportId), &uploadInfos) + if err != nil { + return common.PollStatusResponse{ + InProgress: false, + StatusCode: http.StatusBadRequest, + Complete: true, + HasFailed: true, + Error: fmt.Sprintf("failed to unmarshal import id: %v", err), + } + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(m.config.maxConcurrentPollWorkers.Load()) + + for i, info := range uploadInfos { + g.Go(func() error { + if err := m.pollUploadInfo(ctx, info); err != nil { + uploadInfos[i].Failed = true + uploadInfos[i].Reason = err.Error() + m.logger.Warnn("Failed to poll channel offset", + logger.NewStringField("channelId", info.ChannelID), + logger.NewStringField("offset", info.Offset), + logger.NewStringField("table", info.Table), + obskit.Error(err), + ) + } + return nil + }) + } + _ = g.Wait() + + if err := g.Wait(); err != nil { + return common.PollStatusResponse{ + InProgress: false, + StatusCode: http.StatusOK, + Complete: true, + HasFailed: true, + FailedJobURLs: stringify.Any(uploadInfos), + } + } + + return common.PollStatusResponse{ + InProgress: false, + StatusCode: http.StatusOK, + Complete: true, + HasFailed: false, + HasWarning: false, + } +} + +func (m *Manager) pollUploadInfo(ctx context.Context, info uploadInfo) error { + log := m.logger.Withn( + logger.NewStringField("channelId", info.ChannelID), + logger.NewStringField("offset", info.Offset), + logger.NewStringField("table", info.Table), + ) + log.Infon("Polling for channel") + + for { + statusRes, err := m.api.Status(ctx, info.ChannelID) + if err != nil { + return fmt.Errorf("getting status: %v", err) + } + if !statusRes.Valid || !statusRes.Success { + return errInvalidStatusResponse + } + if statusRes.Offset == info.Offset { + log.Infon("Polling completed") + return nil + } + log.Infon("Polling in progress. Sleeping before next poll.", + logger.NewStringField("statusOffset", statusRes.Offset), + logger.NewBoolField("statusSuccess", statusRes.Success), + logger.NewBoolField("statusValid", statusRes.Valid), + logger.NewDurationField("pollFrequency", m.config.pollFrequency), + ) + time.Sleep(m.config.pollFrequency) + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go new file mode 100644 index 0000000000..f9b23c76a5 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go @@ -0,0 +1,108 @@ +package snowpipestreaming + +import ( + "errors" + "net/http" + "sync" + "time" + + "github.com/hashicorp/go-retryablehttp" + jsoniter "github.com/json-iterator/go" + + "github.com/rudderlabs/rudder-go-kit/bytesize" + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + snowpipeapi "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api" + "github.com/rudderlabs/rudder-server/utils/timeutil" +) + +var json = jsoniter.ConfigCompatibleWithStandardLibrary + +var ( + errInvalidStatusResponse = errors.New("invalid status response") + errInsertingDataFailed = errors.New("inserting data failed") +) + +func New( + conf *config.Config, + logger logger.Logger, + statsFactory stats.Stats, + destination *backendconfig.DestinationT, + opts ...Opt, +) *Manager { + m := &Manager{ + conf: conf, + logger: logger.Child("snowpipestreaming").Withn( + obskit.WorkspaceID(destination.WorkspaceID), + obskit.DestinationID(destination.ID), + obskit.DestinationType(destination.DestinationDefinition.Name), + ), + statsFactory: statsFactory, + destination: destination, + now: timeutil.Now, + channelCache: sync.Map{}, + } + for _, opt := range opts { + opt(m) + } + + m.config.client.maxHTTPConnections = conf.GetInt("SnowpipeStreaming.Client.maxHTTPConnections", 10) + m.config.client.maxHTTPIdleConnections = conf.GetInt("SnowpipeStreaming.Client.maxHTTPIdleConnections", 5) + m.config.client.maxIdleConnDuration = conf.GetDuration("SnowpipeStreaming.Client.maxIdleConnDuration", 30, time.Second) + m.config.client.disableKeepAlives = conf.GetBool("SnowpipeStreaming.Client.disableKeepAlives", true) + m.config.client.timeoutDuration = conf.GetDuration("SnowpipeStreaming.Client.timeout", 300, time.Second) + m.config.client.retryWaitMin = conf.GetDuration("SnowpipeStreaming.Client.retryWaitMin", 100, time.Millisecond) + m.config.client.retryWaitMax = conf.GetDuration("SnowpipeStreaming.Client.retryWaitMax", 10, time.Second) + m.config.client.retryMax = conf.GetInt("SnowpipeStreaming.Client.retryWaitMin", 5) + m.config.clientURL = conf.GetString("SnowpipeStreaming.Client.URL", "http://localhost:9078") + m.config.instanceID = conf.GetString("INSTANCE_ID", "1") + m.config.pollFrequency = conf.GetDuration("SnowpipeStreaming.pollFrequency", 300, time.Millisecond) + m.config.maxBufferCapacity = conf.GetReloadableInt64Var(512*bytesize.KB, bytesize.B, "SnowpipeStreaming.maxBufferCapacity") + m.config.maxConcurrentPollWorkers = conf.GetReloadableIntVar(10, 1, "SnowpipeStreaming.maxConcurrentPollWorkers") + m.config.maxConcurrentUploadWorkers = conf.GetReloadableIntVar(8, 1, "SnowpipeStreaming.maxConcurrentUploadWorkers") + + tags := stats.Tags{ + "module": "batch_router", + "workspaceId": destination.WorkspaceID, + "destType": destination.DestinationDefinition.Name, + "destinationId": destination.ID, + } + m.stats.successJobCount = statsFactory.NewTaggedStat("snowpipestreaming_success_job_count", stats.CountType, tags) + m.stats.failedJobCount = statsFactory.NewTaggedStat("snowpipestreaming_failed_jobs_count", stats.CountType, tags) + m.stats.discardCount = statsFactory.NewTaggedStat("snowpipestreaming_discards_count", stats.CountType, tags) + m.stats.channelSchemaCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_schema_error", stats.CountType, tags) + m.stats.channelTableCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_table_error", stats.CountType, tags) + + if m.requestDoer == nil { + m.requestDoer = m.retryableClient().StandardClient() + } + + m.api = newApiAdapter( + snowpipeapi.New(m.config.clientURL, m.requestDoer), + statsFactory, + destination, + ) + return m +} + +func (m *Manager) retryableClient() *retryablehttp.Client { + client := retryablehttp.NewClient() + client.HTTPClient = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: m.config.client.disableKeepAlives, + MaxConnsPerHost: m.config.client.maxHTTPConnections, + MaxIdleConnsPerHost: m.config.client.maxHTTPIdleConnections, + IdleConnTimeout: m.config.client.maxIdleConnDuration, + }, + Timeout: m.config.client.timeoutDuration, + } + client.Logger = nil + client.RetryWaitMin = m.config.client.retryWaitMin + client.RetryWaitMax = m.config.client.retryWaitMax + client.RetryMax = m.config.client.retryMax + return client +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml new file mode 100644 index 0000000000..0c9f5ea530 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + rudder-snowpipe-clients: + image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + ports: + - "9078" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:9078/health || exit 1 + interval: 1s + retries: 25 diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go new file mode 100644 index 0000000000..c97100f62d --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go @@ -0,0 +1,72 @@ +package testhelper + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + "testing" + "time" + + "github.com/rudderlabs/rudder-go-kit/testhelper/rand" + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +const ( + TestKeyPairUnencrypted = "SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS" +) + +type TestCredentials struct { + Account string `json:"account"` + Warehouse string `json:"warehouse"` + User string `json:"user"` + Role string `json:"role"` + Database string `json:"database"` + PrivateKey string `json:"privateKey"` + PrivateKeyPassphrase string `json:"privateKeyPassphrase"` +} + +func GetSnowPipeTestCredentials(key string) (*TestCredentials, error) { + cred, exists := os.LookupEnv(key) + if !exists { + return nil, errors.New("snowpipe test credentials not found") + } + + var credentials TestCredentials + err := json.Unmarshal([]byte(cred), &credentials) + if err != nil { + return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) + } + return &credentials, nil +} + +func RandSchema(provider string) string { + hex := strings.ToLower(rand.String(12)) + namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) + return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, + namespace, + )) +} + +func DropSchema(t *testing.T, db *sql.DB, namespace string) { + t.Helper() + t.Log("dropping schema", namespace) + + require.Eventually(t, + func() bool { + _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + if err != nil { + t.Logf("error deleting schema %q: %v", namespace, err) + return false + } + return true + }, + time.Minute, + time.Second, + ) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/transform.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/transform.go new file mode 100644 index 0000000000..3e9f73e6c8 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/transform.go @@ -0,0 +1,10 @@ +package snowpipestreaming + +import ( + "github.com/rudderlabs/rudder-server/jobsdb" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" +) + +func (m *Manager) Transform(job *jobsdb.JobT) (string, error) { + return common.GetMarshalledData(string(job.EventPayload), job.JobID) +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go new file mode 100644 index 0000000000..0a44550768 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go @@ -0,0 +1,116 @@ +package snowpipestreaming + +import ( + "context" + "net/http" + "sync" + "time" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +type ( + Manager struct { + conf *config.Config + logger logger.Logger + statsFactory stats.Stats + destination *backendconfig.DestinationT + requestDoer requestDoer + now func() time.Time + api api + channelCache sync.Map + + config struct { + client struct { + maxHTTPConnections int + maxHTTPIdleConnections int + maxIdleConnDuration time.Duration + disableKeepAlives bool + timeoutDuration time.Duration + retryWaitMin time.Duration + retryWaitMax time.Duration + retryMax int + } + + clientURL string + instanceID string + pollFrequency time.Duration + maxBufferCapacity config.ValueLoader[int64] + maxConcurrentPollWorkers config.ValueLoader[int] + maxConcurrentUploadWorkers config.ValueLoader[int] + } + + stats struct { + successJobCount stats.Counter + failedJobCount stats.Counter + discardCount stats.Counter + channelSchemaCreationErrorCount stats.Counter + channelTableCreationErrorCount stats.Counter + } + } + + requestDoer interface { + Do(*http.Request) (*http.Response, error) + } + + event struct { + Message struct { + Metadata struct { + Table string `json:"table"` + Columns map[string]string `json:"columns"` + } `json:"metadata"` + Data map[string]any `json:"data"` + } `json:"message"` + Metadata struct { + JobID int64 `json:"job_id"` + } + } + + destConfig struct { + Account string `mapstructure:"account"` + Warehouse string `mapstructure:"warehouse"` + Database string `mapstructure:"database"` + User string `mapstructure:"user"` + Role string `mapstructure:"role"` + PrivateKey string `mapstructure:"privateKey"` + PrivateKeyPassphrase string `mapstructure:"privateKeyPassphrase"` + Namespace string `mapstructure:"namespace"` + } + + uploadInfo struct { + ChannelID string `json:"channelId"` + Offset string `json:"offset"` + Table string `json:"table"` + Failed bool `json:"failed"` + Reason string `json:"reason"` + } + + discardInfo struct { + table string + colName string + eventData map[string]any + reason string + uuidTS string + } + + api interface { + CreateChannel(ctx context.Context, channelReq *model.CreateChannelRequest) (*model.ChannelResponse, error) + DeleteChannel(ctx context.Context, channelID string, sync bool) error + Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) + Status(ctx context.Context, channelID string) (*model.StatusResponse, error) + } +) + +func (e *event) setUUIDTimestamp(formattedTimestamp string) { + uuidTimestampColumn := whutils.ToProviderCase(whutils.SNOWFLAKE, "uuid_ts") + if _, columnExists := e.Message.Metadata.Columns[uuidTimestampColumn]; columnExists { + e.Message.Data[uuidTimestampColumn] = formattedTimestamp + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go new file mode 100644 index 0000000000..4c7248e35d --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go @@ -0,0 +1,266 @@ +package snowpipestreaming + +import ( + "bufio" + "context" + stdjson "encoding/json" + "fmt" + "os" + "strconv" + "sync" + + "github.com/mitchellh/mapstructure" + "github.com/samber/lo" + "golang.org/x/sync/errgroup" + + "github.com/rudderlabs/rudder-go-kit/logger" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + "github.com/rudderlabs/rudder-server/utils/misc" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func (m *Manager) Upload(asyncDest *common.AsyncDestinationStruct) common.AsyncUploadOutput { + m.logger.Infon("Uploading data to snowpipe streaming destination") + + var destConf destConfig + err := mapstructure.Decode(asyncDest.Destination.Config, &destConf) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to decode destination config: %v", err).Error()) + } + + events, err := m.eventsFromFile(asyncDest.FileName) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to read events from file: %v", err).Error()) + } + m.logger.Infon("Read events from file", logger.NewIntField("events", int64(len(events)))) + + failedJobIDs, successJobIDs, uploadInfos := m.handleEvents(asyncDest, events, destConf) + + var importParameters stdjson.RawMessage + if len(uploadInfos) > 0 { + importIDBytes, err := json.Marshal(uploadInfos) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import id: %v", err).Error()) + } + + importParameters, err = json.Marshal(common.ImportParameters{ + ImportId: string(importIDBytes), + }) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import parameters: %v", err).Error()) + } + } + m.logger.Infon("Uploaded data to snowpipe streaming destination") + + m.stats.failedJobCount.Count(len(failedJobIDs)) + m.stats.successJobCount.Count(len(successJobIDs)) + + return common.AsyncUploadOutput{ + ImportingJobIDs: successJobIDs, + ImportingCount: len(successJobIDs), + ImportingParameters: importParameters, + FailedJobIDs: failedJobIDs, + FailedCount: len(failedJobIDs), + DestinationID: asyncDest.Destination.ID, + } +} + +func (m *Manager) eventsFromFile(fileName string) ([]event, error) { + file, err := os.Open(fileName) + if err != nil { + return nil, fmt.Errorf("failed to open file %s: %w", fileName, err) + } + defer func() { + _ = file.Close() + }() + + var events []event + + scanner := bufio.NewScanner(file) + scanner.Buffer(nil, int(m.config.maxBufferCapacity.Load())) + + for scanner.Scan() { + var e event + if err := json.Unmarshal(scanner.Bytes(), &e); err != nil { + return nil, fmt.Errorf("failed to unmarshal event: %v", err) + } + + events = append(events, e) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading from file: %v", err) + } + return events, nil +} + +func (m *Manager) handleEvents( + asyncDest *common.AsyncDestinationStruct, + events []event, + destConf destConfig, +) ( + failedJobIDs []int64, + successJobIDs []int64, + uploadInfos []*uploadInfo, +) { + var ( + discardInfos []discardInfo + mu sync.Mutex + ) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + g, gCtx := errgroup.WithContext(ctx) + g.SetLimit(m.config.maxConcurrentUploadWorkers.Load()) + + groupedEvents := lo.GroupBy(events, func(event event) string { + return event.Message.Metadata.Table + }) + for tableName, tableEvents := range groupedEvents { + g.Go(func() error { + jobIDs := lo.Map(tableEvents, func(event event, _ int) int64 { + return event.Metadata.JobID + }) + + uploadTableInfo, discardTableInfo, err := m.loadTableEventsToSnowPipe( + gCtx, asyncDest, destConf, tableName, tableEvents, + ) + + mu.Lock() + defer mu.Unlock() + + if err != nil { + m.logger.Warnn("Failed to upload events to table", + logger.NewStringField("table", tableName), + obskit.Error(err), + ) + + failedJobIDs = append(failedJobIDs, jobIDs...) + return nil + } + + successJobIDs = append(successJobIDs, jobIDs...) + uploadInfos = append(uploadInfos, uploadTableInfo) + discardInfos = append(discardInfos, discardTableInfo...) + return nil + }) + } + _ = g.Wait() + + if len(discardInfos) > 0 { + discardUploadInfo, err := m.loadDiscardsToSnowPipe(ctx, asyncDest, destConf, discardInfos) + if err != nil { + m.logger.Warnn("Failed to upload events to discards table", + logger.NewStringField("table", discardsTable()), + obskit.Error(err), + ) + } else { + uploadInfos = append(uploadInfos, discardUploadInfo) + } + } + return failedJobIDs, successJobIDs, uploadInfos +} + +func (m *Manager) loadTableEventsToSnowPipe( + ctx context.Context, + asyncDest *common.AsyncDestinationStruct, + destConf destConfig, + tableName string, + tableEvents []event, +) (*uploadInfo, []discardInfo, error) { + log := m.logger.Withn( + logger.NewStringField("table", tableName), + logger.NewIntField("events", int64(len(tableEvents))), + ) + log.Infon("Uploading data to table") + + eventSchema := schemaFromEvents(tableEvents) + + channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + if err != nil { + return nil, nil, fmt.Errorf("creating channel: %v", err) + } + snowPipeSchema := channelResponse.SnowPipeSchema() + + columnInfos := findNewColumns(eventSchema, snowPipeSchema) + if len(columnInfos) > 0 { + if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { + return nil, nil, fmt.Errorf("adding columns: %v", err) + } + + channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) + if err != nil { + return nil, nil, fmt.Errorf("recreating channel: %v", err) + } + snowPipeSchema = channelResponse.SnowPipeSchema() + } + + formattedTS := m.now().Format(misc.RFC3339Milli) + for _, tableEvent := range tableEvents { + tableEvent.setUUIDTimestamp(formattedTS) + } + + discardInfos := lo.FlatMap(tableEvents, func(tableEvent event, _ int) []discardInfo { + return discardedRecords(tableEvent, snowPipeSchema, tableName, formattedTS) + }) + + oldestEvent := lo.MaxBy(tableEvents, func(a, b event) bool { + return a.Metadata.JobID > b.Metadata.JobID + }) + offset := strconv.FormatInt(oldestEvent.Metadata.JobID, 10) + + insertReq := &model.InsertRequest{ + Rows: lo.Map(tableEvents, func(event event, _ int) model.Row { + return event.Message.Data + }), + Offset: offset, + } + insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) + if err != nil { + if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } + return nil, nil, fmt.Errorf("inserting data: %v", err) + } + if !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } + return nil, nil, errInsertingDataFailed + } + log.Infon("Successfully uploaded data to table") + + info := &uploadInfo{ + ChannelID: channelResponse.ChannelID, + Offset: offset, + Table: tableName, + } + return info, discardInfos, nil +} + +// schemaFromEvents Iterate over events and merge their columns into the final map +// Keeping the first type first serve basis +func schemaFromEvents(events []event) whutils.ModelTableSchema { + columnsMap := make(whutils.ModelTableSchema) + for _, e := range events { + for col, typ := range e.Message.Metadata.Columns { + if _, exists := columnsMap[col]; !exists { + columnsMap[col] = typ + } + } + } + return columnsMap +} + +func (m *Manager) abortJobs(asyncDest *common.AsyncDestinationStruct, abortReason string) common.AsyncUploadOutput { + m.stats.failedJobCount.Count(len(asyncDest.ImportingJobIDs)) + return common.AsyncUploadOutput{ + AbortJobIDs: asyncDest.ImportingJobIDs, + AbortCount: len(asyncDest.ImportingJobIDs), + AbortReason: abortReason, + DestinationID: asyncDest.Destination.ID, + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go new file mode 100644 index 0000000000..c3f790d382 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go @@ -0,0 +1,65 @@ +package snowpipestreaming + +import ( + "fmt" + "net/http" + + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/tidwall/gjson" + + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" +) + +func (m *Manager) GetUploadStats(input common.GetUploadStatsInput) common.GetUploadStatsResponse { + m.logger.Infon("Getting upload stats for snowpipe streaming destination") + + var infos []uploadInfo + err := json.Unmarshal([]byte(input.FailedJobURLs), &infos) + if err != nil { + m.logger.Warnn("Failed to unmarshal failed job urls", obskit.Error(err)) + return common.GetUploadStatsResponse{ + StatusCode: 500, + Error: fmt.Errorf("failed to unmarshal failed job urls: %v", err).Error(), + } + } + + var ( + succeededTables map[string]uploadInfo + failedTables map[string]uploadInfo + ) + + for _, info := range infos { + if info.Failed { + failedTables[info.Table] = info + } else { + succeededTables[info.Table] = info + } + } + + var ( + succeededJobIDs []int64 + failedJobIDs []int64 + failedJobReasons map[int64]string + ) + + for _, job := range input.ImportingList { + tableName := gjson.GetBytes(job.EventPayload, "metadata.table").String() + if _, ok := succeededTables[tableName]; ok { + succeededJobIDs = append(succeededJobIDs, job.JobID) + continue + } + if info, ok := failedTables[tableName]; ok { + failedJobIDs = append(failedJobIDs, job.JobID) + failedJobReasons[job.JobID] = info.Reason + } + } + + return common.GetUploadStatsResponse{ + StatusCode: http.StatusOK, + Metadata: common.EventStatMeta{ + FailedKeys: failedJobIDs, + SucceededKeys: succeededJobIDs, + FailedReasons: failedJobReasons, + }, + } +} diff --git a/router/batchrouter/handle.go b/router/batchrouter/handle.go index d3feb21656..ecf8e15b0e 100644 --- a/router/batchrouter/handle.go +++ b/router/batchrouter/handle.go @@ -28,6 +28,7 @@ import ( "github.com/rudderlabs/rudder-go-kit/stats" kitsync "github.com/rudderlabs/rudder-go-kit/sync" obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" asynccommon "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" @@ -76,6 +77,7 @@ type Handle struct { maxFailedCountForJob config.ValueLoader[int] maxFailedCountForSourcesJob config.ValueLoader[int] asyncUploadTimeout config.ValueLoader[time.Duration] + asyncUploadWorkerTimeout config.ValueLoader[time.Duration] retryTimeWindow config.ValueLoader[time.Duration] sourcesRetryTimeWindow config.ValueLoader[time.Duration] reportingEnabled bool diff --git a/router/batchrouter/handle_async.go b/router/batchrouter/handle_async.go index 3d370a141f..3571503a03 100644 --- a/router/batchrouter/handle_async.go +++ b/router/batchrouter/handle_async.go @@ -16,6 +16,7 @@ import ( "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/stats" + "github.com/rudderlabs/rudder-server/jobsdb" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" asynccommon "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" @@ -320,7 +321,7 @@ func (brt *Handle) asyncUploadWorker(ctx context.Context) { select { case <-ctx.Done(): return - case <-time.After(10 * time.Second): + case <-time.After(brt.asyncUploadWorkerTimeout.Load()): brt.configSubscriberMu.RLock() destinationsMap := brt.destinationsMap uploadIntervalMap := brt.uploadIntervalMap diff --git a/router/batchrouter/handle_lifecycle.go b/router/batchrouter/handle_lifecycle.go index 06851978d7..676e5c4145 100644 --- a/router/batchrouter/handle_lifecycle.go +++ b/router/batchrouter/handle_lifecycle.go @@ -202,6 +202,7 @@ func (brt *Handle) setupReloadableVars() { brt.maxFailedCountForJob = config.GetReloadableIntVar(128, 1, "BatchRouter."+brt.destType+".maxFailedCountForJob", "BatchRouter.maxFailedCountForJob") brt.maxFailedCountForSourcesJob = config.GetReloadableIntVar(3, 1, "BatchRouter.RSources."+brt.destType+".maxFailedCountForJob", "BatchRouter.RSources.maxFailedCountForJob") brt.asyncUploadTimeout = config.GetReloadableDurationVar(30, time.Minute, "BatchRouter."+brt.destType+".asyncUploadTimeout", "BatchRouter.asyncUploadTimeout") + brt.asyncUploadWorkerTimeout = config.GetReloadableDurationVar(10, time.Second, "BatchRouter."+brt.destType+".asyncUploadWorkerTimeout", "BatchRouter.asyncUploadWorkerTimeout") brt.retryTimeWindow = config.GetReloadableDurationVar(180, time.Minute, "BatchRouter."+brt.destType+".retryTimeWindow", "BatchRouter."+brt.destType+".retryTimeWindowInMins", "BatchRouter.retryTimeWindow", "BatchRouter.retryTimeWindowInMins") brt.sourcesRetryTimeWindow = config.GetReloadableDurationVar(1, time.Minute, "BatchRouter.RSources."+brt.destType+".retryTimeWindow", "BatchRouter.RSources."+brt.destType+".retryTimeWindowInMins", "BatchRouter.RSources.retryTimeWindow", "BatchRouter.RSources.retryTimeWindowInMins") brt.jobQueryBatchSize = config.GetReloadableIntVar(100000, 1, "BatchRouter."+brt.destType+".jobQueryBatchSize", "BatchRouter.jobQueryBatchSize") diff --git a/testhelper/warehouse/records.go b/testhelper/warehouse/records.go new file mode 100644 index 0000000000..c1aedc836d --- /dev/null +++ b/testhelper/warehouse/records.go @@ -0,0 +1,58 @@ +package warehouse + +import ( + "context" + "database/sql" + "testing" + "time" + + "github.com/samber/lo" + "github.com/spf13/cast" + "github.com/stretchr/testify/require" +) + +// RetrieveRecordsFromWarehouse retrieves records from the warehouse based on the given query. +// It returns a slice of slices, where each inner slice represents a record's values. +func RetrieveRecordsFromWarehouse( + t testing.TB, + db *sql.DB, + query string, +) [][]string { + t.Helper() + + rows, err := db.QueryContext(context.Background(), query) + require.NoError(t, err) + defer func() { _ = rows.Close() }() + + _ = rows.Err() + + columns, err := rows.Columns() + require.NoError(t, err) + + var records [][]string + for rows.Next() { + resultSet := make([]any, len(columns)) + resultSetPtrs := make([]any, len(columns)) + for i := 0; i < len(columns); i++ { + resultSetPtrs[i] = &resultSet[i] + } + + err = rows.Scan(resultSetPtrs...) + require.NoError(t, err) + + records = append(records, lo.Map(resultSet, func(item any, index int) string { + switch item := item.(type) { + case time.Time: + return item.Format(time.RFC3339) + case string: + if t, err := time.Parse(time.RFC3339Nano, item); err == nil { + return t.Format(time.RFC3339) + } + return item + default: + return cast.ToString(item) + } + })) + } + return records +} diff --git a/utils/misc/misc.go b/utils/misc/misc.go index 64e07af891..0c0677e114 100644 --- a/utils/misc/misc.go +++ b/utils/misc/misc.go @@ -96,7 +96,7 @@ func Init() { } func BatchDestinations() []string { - batchDestinations := []string{"S3", "GCS", "MINIO", "RS", "BQ", "AZURE_BLOB", "SNOWFLAKE", "POSTGRES", "CLICKHOUSE", "DIGITAL_OCEAN_SPACES", "MSSQL", "AZURE_SYNAPSE", "S3_DATALAKE", "MARKETO_BULK_UPLOAD", "GCS_DATALAKE", "AZURE_DATALAKE", "DELTALAKE", "BINGADS_AUDIENCE", "ELOQUA", "YANDEX_METRICA_OFFLINE_EVENTS", "SFTP", "BINGADS_OFFLINE_CONVERSIONS", "KLAVIYO_BULK_UPLOAD", "LYTICS_BULK_UPLOAD"} + batchDestinations := []string{"S3", "GCS", "MINIO", "RS", "BQ", "AZURE_BLOB", "SNOWFLAKE", "POSTGRES", "CLICKHOUSE", "DIGITAL_OCEAN_SPACES", "MSSQL", "AZURE_SYNAPSE", "S3_DATALAKE", "MARKETO_BULK_UPLOAD", "GCS_DATALAKE", "AZURE_DATALAKE", "DELTALAKE", "BINGADS_AUDIENCE", "ELOQUA", "YANDEX_METRICA_OFFLINE_EVENTS", "SFTP", "BINGADS_OFFLINE_CONVERSIONS", "KLAVIYO_BULK_UPLOAD", "LYTICS_BULK_UPLOAD", "SNOWPIPE_STREAMING"} return batchDestinations } diff --git a/warehouse/integrations/snowflake/datatype_mapper.go b/warehouse/integrations/snowflake/datatype_mapper.go index be077a6f74..4596e0845e 100644 --- a/warehouse/integrations/snowflake/datatype_mapper.go +++ b/warehouse/integrations/snowflake/datatype_mapper.go @@ -1,7 +1,5 @@ package snowflake -import "database/sql" - var dataTypesMap = map[string]string{ "boolean": "boolean", "int": "number", @@ -44,9 +42,9 @@ var dataTypesMapToRudder = map[string]string{ "VARIANT": "json", } -func calculateDataType(columnType string, numericScale sql.NullInt64) (string, bool) { +func CalculateDataType(columnType string, numericScale int64) (string, bool) { if datatype, ok := dataTypesMapToRudder[columnType]; ok { - if datatype == "int" && numericScale.Valid && numericScale.Int64 > 0 { + if datatype == "int" && numericScale > 0 { datatype = "float" } return datatype, true diff --git a/warehouse/integrations/snowflake/datatype_mapper_test.go b/warehouse/integrations/snowflake/datatype_mapper_test.go index 2dc8e720fe..4807122611 100644 --- a/warehouse/integrations/snowflake/datatype_mapper_test.go +++ b/warehouse/integrations/snowflake/datatype_mapper_test.go @@ -21,7 +21,7 @@ func TestCalculateDataType(t *testing.T) { } for _, tc := range testCases { - dataType, exists := calculateDataType(tc.columnType, tc.numericScale) + dataType, exists := CalculateDataType(tc.columnType, tc.numericScale.Int64) require.Equal(t, tc.expected, dataType) require.Equal(t, tc.exists, exists) } diff --git a/warehouse/integrations/snowflake/snowflake.go b/warehouse/integrations/snowflake/snowflake.go index a2997627ec..12c0a69e13 100644 --- a/warehouse/integrations/snowflake/snowflake.go +++ b/warehouse/integrations/snowflake/snowflake.go @@ -1405,7 +1405,7 @@ func (sf *Snowflake) FetchSchema(ctx context.Context) (model.Schema, model.Schem schema[tableName] = make(map[string]string) } - if datatype, ok := calculateDataType(columnType, numericScale); ok { + if datatype, ok := CalculateDataType(columnType, numericScale.Int64); ok { schema[tableName][columnName] = datatype } else { if _, ok := unrecognizedSchema[tableName]; !ok { diff --git a/warehouse/slave/worker.go b/warehouse/slave/worker.go index 82f40554a0..83725cd276 100644 --- a/warehouse/slave/worker.go +++ b/warehouse/slave/worker.go @@ -345,7 +345,7 @@ func (w *worker) processStagingFile(ctx context.Context, job payload) ([]uploadR violatedConstraints := w.constraintsManager.ViolatedConstraints(job.DestinationType, &batchRouterEvent, columnName) if ok && ((columnType != dataTypeInSchema) || (violatedConstraints.IsViolated)) { - newColumnVal, convError := handleSchemaChange( + newColumnVal, convError := HandleSchemaChange( dataTypeInSchema, columnType, columnVal, @@ -522,8 +522,8 @@ func (w *worker) destinationFromSlaveConnectionMap(destinationId, sourceId strin return conn, nil } -// handleSchemaChange checks if the existing column type is compatible with the new column type -func handleSchemaChange(existingDataType, currentDataType model.SchemaType, value any) (any, error) { +// HandleSchemaChange checks if the existing column type is compatible with the new column type +func HandleSchemaChange(existingDataType, currentDataType model.SchemaType, value any) (any, error) { var ( newColumnVal any err error diff --git a/warehouse/slave/worker_test.go b/warehouse/slave/worker_test.go index 2a88172f12..b44af3d9e8 100644 --- a/warehouse/slave/worker_test.go +++ b/warehouse/slave/worker_test.go @@ -961,7 +961,7 @@ func TestHandleSchemaChange(t *testing.T) { t.Run(tc.name, func(t *testing.T) { t.Parallel() - newColumnVal, convError := handleSchemaChange( + newColumnVal, convError := HandleSchemaChange( tc.existingDatatype, tc.currentDataType, tc.value, diff --git a/warehouse/utils/uploader.go b/warehouse/utils/uploader.go new file mode 100644 index 0000000000..50af3e044f --- /dev/null +++ b/warehouse/utils/uploader.go @@ -0,0 +1,56 @@ +package warehouseutils + +import ( + "context" + "time" + + "github.com/rudderlabs/rudder-server/warehouse/internal/model" +) + +type ( + ModelWarehouse = model.Warehouse + ModelTableSchema = model.TableSchema +) + +//go:generate mockgen -destination=../internal/mocks/utils/mock_uploader.go -package mock_uploader github.com/rudderlabs/rudder-server/warehouse/utils Uploader +type Uploader interface { + IsWarehouseSchemaEmpty() bool + GetLocalSchema(ctx context.Context) (model.Schema, error) + UpdateLocalSchema(ctx context.Context, schema model.Schema) error + GetTableSchemaInWarehouse(tableName string) model.TableSchema + GetTableSchemaInUpload(tableName string) model.TableSchema + GetLoadFilesMetadata(ctx context.Context, options GetLoadFilesOptions) ([]LoadFile, error) + GetSampleLoadFileLocation(ctx context.Context, tableName string) (string, error) + GetSingleLoadFile(ctx context.Context, tableName string) (LoadFile, error) + ShouldOnDedupUseNewRecord() bool + UseRudderStorage() bool + GetLoadFileType() string + CanAppend() bool +} + +type NopUploader struct{} + +func (n *NopUploader) IsWarehouseSchemaEmpty() bool { + return false +} +func (n *NopUploader) GetLocalSchema(ctx context.Context) (model.Schema, error) { return nil, nil } // nolint:nilnil +func (n *NopUploader) UpdateLocalSchema(ctx context.Context, schema model.Schema) error { return nil } +func (n *NopUploader) GetTableSchemaInWarehouse(tableName string) model.TableSchema { return nil } +func (n *NopUploader) GetTableSchemaInUpload(tableName string) model.TableSchema { return nil } +func (n *NopUploader) ShouldOnDedupUseNewRecord() bool { return false } +func (n *NopUploader) UseRudderStorage() bool { return false } +func (n *NopUploader) GetLoadFileGenStartTIme() time.Time { return time.Time{} } +func (n *NopUploader) GetLoadFileType() string { return "" } +func (n *NopUploader) CanAppend() bool { return false } +func (n *NopUploader) GetLoadFilesMetadata(ctx context.Context, options GetLoadFilesOptions) ([]LoadFile, error) { + return nil, nil +} + +func (n *NopUploader) GetSampleLoadFileLocation(ctx context.Context, tableName string) (string, error) { + return "", nil +} + +func (n *NopUploader) GetSingleLoadFile(ctx context.Context, tableName string) (LoadFile, error) { + return LoadFile{}, nil +} +func (n *NopUploader) GetFirstLastEvent() (time.Time, time.Time) { return time.Time{}, time.Time{} } diff --git a/warehouse/utils/utils.go b/warehouse/utils/utils.go index 9673602d70..e32ab9ef77 100644 --- a/warehouse/utils/utils.go +++ b/warehouse/utils/utils.go @@ -2,7 +2,6 @@ package warehouseutils import ( "bytes" - "context" "crypto/sha512" "database/sql" "encoding/hex" @@ -207,20 +206,16 @@ type ColumnInfo struct { Type string } -//go:generate mockgen -destination=../internal/mocks/utils/mock_uploader.go -package mock_uploader github.com/rudderlabs/rudder-server/warehouse/utils Uploader -type Uploader interface { - IsWarehouseSchemaEmpty() bool - GetLocalSchema(ctx context.Context) (model.Schema, error) - UpdateLocalSchema(ctx context.Context, schema model.Schema) error - GetTableSchemaInWarehouse(tableName string) model.TableSchema - GetTableSchemaInUpload(tableName string) model.TableSchema - GetLoadFilesMetadata(ctx context.Context, options GetLoadFilesOptions) ([]LoadFile, error) - GetSampleLoadFileLocation(ctx context.Context, tableName string) (string, error) - GetSingleLoadFile(ctx context.Context, tableName string) (LoadFile, error) - ShouldOnDedupUseNewRecord() bool - UseRudderStorage() bool - GetLoadFileType() string - CanAppend() bool +type Destination struct { + Source backendconfig.SourceT + Destination backendconfig.DestinationT +} + +type Schema model.Schema + +type KeyValue struct { + Key string + Value interface{} } type GetLoadFilesOptions struct { From 91dedc5761d012187627c778e1b19ec9e84a7438 Mon Sep 17 00:00:00 2001 From: achettyiitr Date: Mon, 28 Oct 2024 07:12:18 +0530 Subject: [PATCH 2/2] chore: some more tests --- .github/workflows/tests.yaml | 2 +- .../snowpipestreaming_test.go | 2131 ++++++++--------- ...docker-compose.rudder-snowpipe-clients.yml | 2 +- .../docker-compose.rudder-transformer.yml | 2 +- processor/transformer/transformer.go | 3 + .../snowpipestreaming/apiadapter.go | 125 +- .../snowpipestreaming/channel.go | 112 +- .../snowpipestreaming/columns.go | 4 +- .../snowpipestreaming/discards.go | 86 +- .../snowpipestreaming/internal/api/api.go | 6 +- .../internal/api/api_test.go | 67 +- .../internal/api/{errorcodes.go => codes.go} | 0 .../internal/api/createchannel.go | 7 +- .../internal/api/createchannel_test.go | 44 +- .../internal/api/deletechannel.go | 2 +- .../internal/api/getchannel.go | 3 +- .../internal/api/getchannel_test.go | 41 +- .../snowpipestreaming/internal/api/insert.go | 7 +- .../internal/api/insert_test.go | 4 +- .../snowpipestreaming/internal/api/status.go | 7 +- .../snowpipestreaming/internal/model/model.go | 72 +- .../internal/model/model_test.go | 100 +- .../snowpipestreaming/options.go | 2 - .../snowpipestreaming/poll.go | 105 +- .../snowpipestreaming/snowpipestreaming.go | 24 +- .../snowpipestreaming_test.go | 177 ++ ...docker-compose.rudder-snowpipe-clients.yml | 2 +- .../testhelper/testhelper.go | 13 +- .../snowpipestreaming/types.go | 56 +- .../snowpipestreaming/upload.go | 277 +-- .../snowpipestreaming/uploadstats.go | 27 +- router/batchrouter/handle_async.go | 3 +- router/batchrouter/handle_lifecycle.go | 6 +- warehouse/integrations/manager/manager.go | 4 +- .../testdata/docker-compose.transformer.yml | 11 + warehouse/utils/reservedkeywords.go | 93 + warehouse/utils/uploader.go | 5 - warehouse/utils/utils.go | 45 +- 38 files changed, 1964 insertions(+), 1713 deletions(-) rename router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/{errorcodes.go => codes.go} (100%) create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go create mode 100644 warehouse/integrations/testdata/docker-compose.transformer.yml diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2a441cfe29..e269066f03 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -122,7 +122,7 @@ jobs: go-version-file: 'go.mod' - run: go version - run: go mod download # Not required, used to segregate module download vs test times - - run: make test exclude="/rudder-server/(jobsdb|integration_test|processor|regulation-worker|router|services|suppression-backup-service|warehouse)" + - run: FORCE_RUN_INTEGRATION_TESTS=true make test exclude="/rudder-server/(jobsdb|integration_test|processor|regulation-worker|router|services|suppression-backup-service|warehouse)" - name: Upload coverage report uses: actions/upload-artifact@v4 with: diff --git a/integration_test/snowpipestreaming/snowpipestreaming_test.go b/integration_test/snowpipestreaming/snowpipestreaming_test.go index 4f1939f714..69c0fff878 100644 --- a/integration_test/snowpipestreaming/snowpipestreaming_test.go +++ b/integration_test/snowpipestreaming/snowpipestreaming_test.go @@ -4,84 +4,46 @@ import ( "bytes" "context" "database/sql" - "encoding/json" - "errors" "fmt" "io" "net/http" + "net/http/httptest" "os" "path" "strconv" - "strings" "testing" "time" + "github.com/google/uuid" + "github.com/iancoleman/strcase" "github.com/ory/dockertest/v3" - promClient "github.com/prometheus/client_model/go" - "github.com/rudderlabs/rudder-go-kit/stats/testhelper" "github.com/samber/lo" "github.com/stretchr/testify/require" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/compose-test/compose" "github.com/rudderlabs/compose-test/testcompose" + "github.com/rudderlabs/rudder-go-kit/bytesize" "github.com/rudderlabs/rudder-go-kit/config" kithttputil "github.com/rudderlabs/rudder-go-kit/httputil" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/postgres" - "github.com/rudderlabs/rudder-go-kit/testhelper/rand" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper" "github.com/rudderlabs/rudder-server/runner" "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/utils/httputil" "github.com/rudderlabs/rudder-server/utils/timeutil" "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) -const ( - testKeyPairUnencrypted = "SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS" -) - -type testCredentials struct { - Account string `json:"account"` - User string `json:"user"` - Role string `json:"role"` - Database string `json:"database"` - Warehouse string `json:"warehouse"` - PrivateKey string `json:"privateKey"` - PrivateKeyPassphrase string `json:"privateKeyPassphrase"` -} - -func getSnowpipeTestCredentials(key string) (*testCredentials, error) { - cred, exists := os.LookupEnv(key) - if !exists { - return nil, errors.New("snowpipe test credentials not found") - } - - var credentials testCredentials - err := json.Unmarshal([]byte(cred), &credentials) - if err != nil { - return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) - } - return &credentials, nil -} - -func randSchema(provider string) string { // nolint:unparam - hex := strings.ToLower(rand.String(12)) - namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) - return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, - namespace, - )) -} - func TestSnowPipeStreaming(t *testing.T) { for _, key := range []string{ - testKeyPairUnencrypted, + testhelper.TestKeyPairUnencrypted, } { if _, exists := os.LookupEnv(key); !exists { if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { @@ -97,62 +59,24 @@ func TestSnowPipeStreaming(t *testing.T) { transformerURL := fmt.Sprintf("http://localhost:%d", c.Port("transformer", 9090)) snowPipeClientsURL := fmt.Sprintf("http://localhost:%d", c.Port("rudder-snowpipe-clients", 9078)) - keyPairUnEncryptedCredentials, err := getSnowpipeTestCredentials(testKeyPairUnencrypted) + credentials, err := testhelper.GetSnowPipeTestCredentials(testhelper.TestKeyPairUnencrypted) require.NoError(t, err) t.Run("namespace and table already exists", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -160,129 +84,114 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) - require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("namespace does not exists", func(t *testing.T) { - config.Reset() - defer config.Reset() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } - gwPort, err := kithelper.GetFreePort() + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("table does not exists", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -290,122 +199,191 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("events with different schema", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } - err = sendEvents(5, eventFormat, "writekey1", url) + t.Log("Creating schema and tables") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy.","additional_column_%[1]s": "%[1]s"}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"ADDITIONAL_COLUMN_1": "TEXT", "ADDITIONAL_COLUMN_2": "TEXT", "ADDITIONAL_COLUMN_3": "TEXT", "ADDITIONAL_COLUMN_4": "TEXT", "ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), ADDITIONAL_COLUMN_1, ADDITIONAL_COLUMN_2, ADDITIONAL_COLUMN_3, ADDITIONAL_COLUMN_4, ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "1", "", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "", "2", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "", "", "3", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "", "", "", "4", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "", "", "", "", "5"}, + }, produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("discards", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) - t.Run("table does not exists", func(t *testing.T) { - config.Reset() - defer config.Reset() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } - gwPort, err := kithelper.GetFreePort() + t.Log("Creating schema and tables. Discards is not created") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + cancel() + require.NoError(t, <-done) + }) + t.Run("discards migration for reason", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -413,123 +391,135 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables. Discards is created without reason") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", + })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("discards migrated", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) - t.Run("events with different schema", func(t *testing.T) { - config.Reset() - defer config.Reset() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } - gwPort, err := kithelper.GetFreePort() + t.Log("Creating schema and tables. Discards is created with reason") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", + })) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("don't re-create channel on loading twice when successful", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -537,129 +527,156 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}, "additional_column_%[1]s": "%[1]s"},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "1", "", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "2", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "3", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "4", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "", "5"}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "1", "", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "", "2", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "", "", "3", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "", "", "", "4", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "", "", "", "", "5"}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecords(source, destination), tracksRecords(source, destination)...), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("many tables", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) - t.Run("discards", func(t *testing.T) { - config.Reset() - defer config.Reset() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + t.Log("Creating schema and tables") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) + for i := 0; i < 10; i++ { + eventFormat := func(int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","userId":"%[1]s","event":"Product Reviewed %[1]s","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(i+1), + ) } - return err + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + } + + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5*10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 2*5*10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + expectedSchema := lo.SliceToMap( + lo.RepeatBy(10, func(index int) string { + return "PRODUCT_REVIEWED_" + strconv.Itoa(index+1) + }), + func(tableName string) (string, map[string]string) { + return tableName, map[string]string{"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"} + }, + ) + expectedSchema = lo.Assign(expectedSchema, map[string]map[string]string{ + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }) + require.Equal(t, expectedSchema, convertRecordsToSchema(schema)) + + for i := 0; i < 10; i++ { + productIDIndex := i + 1 + userID := strconv.Itoa(productIDIndex) + eventName := "Product Reviewed " + strconv.Itoa(productIDIndex) + tableName := strcase.ToSnake(eventName) + recordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED_"+strconv.Itoa(productIDIndex))) + ts := timeutil.Now().Format("2006-01-02") + + expectedProductReviewedRecords := [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + } + require.Equal(t, expectedProductReviewedRecords, recordsFromDB) + } + + trackRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + expectedTrackRecords := lo.RepeatBy(50, func(index int) []string { + productIDIndex := index/5 + 1 + userID := strconv.Itoa(productIDIndex) + eventName := "Product Reviewed " + strconv.Itoa(productIDIndex) + tableName := strcase.ToSnake(eventName) + ts := timeutil.Now().Format("2006-01-02") + + return []string{destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts} }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + require.ElementsMatch(t, expectedTrackRecords, trackRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (schema deleted)", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -672,160 +689,145 @@ func TestSnowPipeStreaming(t *testing.T) { require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) + t.Log("Schema modified, Dropping schema") + testhelper.DropSchema(t, sm.DB.DB, namespace) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 10) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("schema modified after channel creation (table deleted)", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) - t.Run("discards migration for reason", func(t *testing.T) { - config.Reset() - defer config.Reset() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - gwPort, err := kithelper.GetFreePort() + t.Log("Schema modified, Dropping table") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("DROP TABLE %q.%q;", namespace, "TRACKS")) require.NoError(t, err) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (columns deleted)", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -838,163 +840,81 @@ func TestSnowPipeStreaming(t *testing.T) { require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", - })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ - "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - err = sendEvents(5, eventFormat, "writekey1", url) + t.Log("Schema modified, Dropping columns for TRACKS table") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP;", namespace)) require.NoError(t, err) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + recordsBeforeDeletion := [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecords(source, destination), recordsBeforeDeletion...), tracksRecordsFromDB) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - - t.Run("discards migrated", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + t.Run("schema modified after channel creation (datatype changed for all tables)", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1007,172 +927,81 @@ func TestSnowPipeStreaming(t *testing.T) { require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - err = sendEvents(5, eventFormat, "writekey1", url) + t.Log("Schema modified, CONTEXT_IP, CONTEXT_PASSED_IP are of type int") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.PRODUCT_REVIEWED DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP;", namespace)) require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.PRODUCT_REVIEWED ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER;", namespace)) + require.NoError(t, err) + + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 10) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecordsForDiscards(source, destination), productReviewedRecordsForDiscards(source, destination)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecordsForDiscards(source, destination), tracksRecordsForDiscards(source, destination)...), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - - t.Run("don't re-create channel on loading twice when successful", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - prometheusPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + t.Run("schema modified after channel creation (datatype changed for partial tables)", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - config.Set("enableStats", true) - config.Set("RuntimeStats.enabled", false) - config.Set("OpenTelemetry.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) - config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1185,167 +1014,141 @@ func TestSnowPipeStreaming(t *testing.T) { require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, strconv.Itoa(index+1), - "identify", ) } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) - t.Log("Sending 5 events") - err = sendEvents(5, eventFormat, "writekey1", url) + t.Log("Schema modified, CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER;", namespace)) require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") t.Log("Sending 5 events again") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 10 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 20 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - metrics := getPrometheusMetrics(t, prometheusPort) - require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) - require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecordsForDiscards(source, destination), tracksRecordsForDiscards(source, destination)...), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardTracksRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("JSON columns", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + destination.Config["jsonPaths"] = "track.properties.jsonInfo" - t.Run("many tables", func(t *testing.T) {}) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - t.Run("schema modified after channel creation (datatype changed)", func(t *testing.T) { - config.Reset() - defer config.Reset() + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - prometheusPort, err := kithelper.GetFreePort() - require.NoError(t, err) + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy.", "jsonInfo": {"id":123,"name":"Test User","email":"testuser@example.com","isActive":true,"createdAt":"2023-10-01T12:34:56Z","profile":{"age":30,"address":{"street":"123 Test St","city":"Testville","zip":"12345"},"interests":["coding","reading","gaming"]}}}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z", "context":{"ip":"14.5.67.21"}}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ", "JSON_INFO": "VARIANT"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), JSON_INFO FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.Equal(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + }, produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + cancel() + require.NoError(t, <-done) + }) + t.Run("identify event should not contain users", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - config.Set("enableStats", true) - config.Set("RuntimeStats.enabled", false) - config.Set("OpenTelemetry.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) - config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1353,18 +1156,15 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) - require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", - })) eventFormat := func(index int) string { return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, @@ -1372,123 +1172,111 @@ func TestSnowPipeStreaming(t *testing.T) { "identify", ) } - - t.Log("Sending 5 events") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - t.Log("Schema modified") - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) - require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) - require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) - require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) - require.NoError(t, err) - - t.Log("Sending 5 events again") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 10 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 20 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - metrics := getPrometheusMetrics(t, prometheusPort) - require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) - require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + identifiesRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, identifiesRecords(source, destination), identifiesRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) +} - t.Run("schema modified after channel creation (table deleted)", func(t *testing.T) {}) +func initializeTestEnvironment(t testing.TB) (*postgres.Resource, int) { + t.Helper() + + config.Reset() + t.Cleanup(config.Reset) + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gatewayPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + return postgresContainer, gatewayPort +} - t.Run("schema modified after channel creation (schema deleted)", func(t *testing.T) {}) +func setupBackendConfigTestServer( + t testing.TB, + credentials *testhelper.TestCredentials, + namespace string, +) ( + *httptest.Server, + backendconfig.SourceT, + backendconfig.DestinationT, +) { + t.Helper() - t.Run("schema modified after channel creation (columns deleted)", func(t *testing.T) {}) + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", credentials.Account). + WithConfigOption("warehouse", credentials.Warehouse). + WithConfigOption("database", credentials.Database). + WithConfigOption("role", credentials.Role). + WithConfigOption("user", credentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", credentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", credentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + + backendConfigServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + t.Cleanup(backendConfigServer.Close) + return backendConfigServer, source, destination } -func runRudderServer(ctx context.Context, port int, postgresContainer *postgres.Resource, cbURL, transformerURL, snowpipeClientsURL, tmpDir string) (err error) { +func runRudderServer( + ctx context.Context, + port int, + postgresContainer *postgres.Resource, + cbURL, transformerURL, snowpipeClientsURL, + tmpDir string, +) (err error) { + config.Set("INSTANCE_ID", "1") config.Set("CONFIG_BACKEND_URL", cbURL) config.Set("WORKSPACE_TOKEN", "token") + config.Set("DEST_TRANSFORM_URL", transformerURL) config.Set("DB.host", postgresContainer.Host) config.Set("DB.port", postgresContainer.Port) config.Set("DB.user", postgresContainer.User) config.Set("DB.name", postgresContainer.Database) config.Set("DB.password", postgresContainer.Password) - config.Set("DEST_TRANSFORM_URL", transformerURL) config.Set("SnowpipeStreaming.Client.URL", snowpipeClientsURL) - config.Set("BatchRouter.pollStatusLoopSleep", "1s") - config.Set("BatchRouter.asyncUploadTimeout", "1s") - config.Set("BatchRouter.asyncUploadWorkerTimeout", "1s") - config.Set("BatchRouter.mainLoopFreq", "1s") - config.Set("BatchRouter.uploadFreq", "1s") + config.Set("BatchRouter.SNOWPIPE_STREAMING.mainLoopFreq", "1s") // default 30s + config.Set("BatchRouter.SNOWPIPE_STREAMING.uploadFreq", "1s") // default 30s + config.Set("BatchRouter.SNOWPIPE_STREAMING.minIdleSleep", "1s") // default 2s + config.Set("BatchRouter.SNOWPIPE_STREAMING.maxEventsInABatch", 10000) // default 10000 + config.Set("BatchRouter.SNOWPIPE_STREAMING.maxPayloadSizeInBytes", 512*bytesize.KB) // default 10kb + config.Set("BatchRouter.SNOWPIPE_STREAMING.asyncUploadWorkerTimeout", "1s") // default 10s + config.Set("BatchRouter.SNOWPIPE_STREAMING.asyncUploadTimeout", "1s") // default 30m + config.Set("BatchRouter.SNOWPIPE_STREAMING.pollStatusLoopSleep", "1s") // default 10s config.Set("BatchRouter.isolationMode", "none") - config.Set("Warehouse.mode", "off") config.Set("DestinationDebugger.disableEventDeliveryStatusUploads", true) config.Set("SourceDebugger.disableEventUploads", true) @@ -1497,8 +1285,6 @@ func runRudderServer(ctx context.Context, port int, postgresContainer *postgres. config.Set("JobsDB.migrateDSLoopSleepDuration", "60m") config.Set("archival.Enabled", false) config.Set("Reporting.syncer.enabled", false) - config.Set("BatchRouter.mainLoopFreq", "1s") - config.Set("BatchRouter.uploadFreq", "1s") config.Set("Gateway.webPort", strconv.Itoa(port)) config.Set("RUDDER_TMPDIR", os.TempDir()) config.Set("recovery.storagePath", path.Join(tmpDir, "/recovery_data.json")) @@ -1511,16 +1297,20 @@ func runRudderServer(ctx context.Context, port int, postgresContainer *postgres. err = fmt.Errorf("panicked: %v", r) } }() - r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN"}) - c := r.Run(ctx, - []string{"proc-isolation-test-rudder-server"}) + r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN", Version: uuid.NewString()}) + c := r.Run(ctx, []string{"snowpipe-streaming-rudder-server"}) if c != 0 { err = fmt.Errorf("rudder-server exited with a non-0 exit code: %d", c) } return } -func sendEvents(num int, eventFormat func(index int) string, writeKey, url string) error { // nolint:unparam +// nolint:unparam +func sendEvents( + num int, + eventFormat func(index int) string, + writeKey, url string, +) error { for i := 0; i < num; i++ { payload := []byte(eventFormat(i)) req, err := http.NewRequest(http.MethodPost, url+"/v1/batch", bytes.NewReader(payload)) @@ -1541,21 +1331,59 @@ func sendEvents(num int, eventFormat func(index int) string, writeKey, url strin return nil } -func dropSchema(t *testing.T, db *sql.DB, namespace string) { +// nolint:unparam +func requireGatewayJobsCount( + t testing.TB, + ctx context.Context, + db *sql.DB, + status string, + expectedCount int, +) { + t.Helper() + t.Log("Verifying gateway jobs count") + + query := fmt.Sprintf("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = '%s'", status) + count := 0 + require.Eventually(t, + func() bool { + err := db.QueryRowContext(ctx, query).Scan(&count) + if err != nil { + t.Log("Error while querying for jobs count: ", err) + return false + } + t.Logf("require gateway count: %d, expected: %d", count, expectedCount) + return count == expectedCount + }, + 20*time.Second, + 1*time.Second, + ) +} + +// nolint:unparam +func requireBatchRouterJobsCount( + t testing.TB, + ctx context.Context, + db *sql.DB, + status string, + expectedCount int, +) { t.Helper() - t.Log("dropping schema", namespace) + t.Log("Verifying batch router jobs count") + query := fmt.Sprintf("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = '%s'", status) + count := 0 require.Eventually(t, func() bool { - _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + err := db.QueryRowContext(ctx, query).Scan(&count) if err != nil { - t.Logf("error deleting schema %q: %v", namespace, err) + t.Log("Error while querying for jobs count: ", err) return false } - return true + t.Logf("require batch router count: %d, expected: %d", count, expectedCount) + return count == expectedCount }, - time.Minute, - time.Second, + 200*time.Second, + 1*time.Second, ) } @@ -1569,37 +1397,116 @@ func convertRecordsToSchema(input [][]string) map[string]map[string]string { }) } -func getPrometheusMetrics(t *testing.T, prometheusPort int, requiredMetrics ...string) map[string]*promClient.MetricFamily { - t.Helper() +func tracksRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - buf := make([]byte, 0) - url := fmt.Sprintf("http://localhost:%d/metrics", prometheusPort) +func tracksRecordsForDiscards( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - require.Eventuallyf(t, func() bool { - resp, err := http.Get(url) - if err != nil { - t.Logf("Failed to fetch metrics: %v", err) - return false - } - defer httputil.CloseResponse(resp) +func productReviewedRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "14.5.67.21", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - buf, err = io.ReadAll(resp.Body) - if err != nil { - t.Logf("Failed to read response body: %v", err) - return false - } +func productReviewedRecordsForDiscards( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - bufString := string(buf) - for _, metric := range requiredMetrics { - if !strings.Contains(bufString, metric) { - return false - } - } - return true - }, time.Minute, 100*time.Millisecond, "Cannot find metrics in time: %s", buf) +func identifiesRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - metrics, err := testhelper.ParsePrometheusMetrics(bytes.NewBuffer(buf)) - require.NoError(t, err) +func discardsRecords() [][]string { + return append(discardProductReviewedRecords(), discardTracksRecords()...) +} - return metrics +func discardProductReviewedRecords() [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "PRODUCT_REVIEWED", ts}, + } +} + +func discardTracksRecords() [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "TRACKS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "TRACKS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "TRACKS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "TRACKS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "TRACKS", ts}, + {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "TRACKS", ts}, + } } diff --git a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml index 0c9f5ea530..4c223f96b6 100644 --- a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -2,7 +2,7 @@ version: "3.9" services: rudder-snowpipe-clients: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + image: "rudderstack/rudder-snowpipe-clients:develop" ports: - "9078" healthcheck: diff --git a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml index 3141df23b2..f822bf374b 100644 --- a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml @@ -2,7 +2,7 @@ version: "3.9" services: transformer: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/develop-rudder-transformer:latest" + image: "rudderstack/develop-rudder-transformer:fix.snowpipe-streaming-users" ports: - "9090:9090" healthcheck: diff --git a/processor/transformer/transformer.go b/processor/transformer/transformer.go index 45164410ef..2368576d1e 100644 --- a/processor/transformer/transformer.go +++ b/processor/transformer/transformer.go @@ -537,6 +537,9 @@ func (trans *handle) destTransformURL(destType string) string { return destinationEndPoint + "?" + whSchemaVersionQueryParam } } + if destType == warehouseutils.SnowpipeStreaming { + return destinationEndPoint + "?" + fmt.Sprintf("whSchemaVersion=%s&whIDResolve=%v", trans.conf.GetString("Warehouse.schemaVersion", "v1"), warehouseutils.IDResolutionEnabled()) + } return destinationEndPoint } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go index 4fd53b975d..68b2331d80 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go @@ -2,70 +2,111 @@ package snowpipestreaming import ( "context" + "strconv" + "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" ) -type apiAdapter struct { - stats struct { - createChannelCount stats.Counter - deleteChannelCount stats.Counter - insertCount stats.Counter - statusCount stats.Counter - createChannelResponseTime stats.Timer - deleteChannelResponseTime stats.Timer - insertResponseTime stats.Timer - statusResponseTime stats.Timer +func newApiAdapter( + logger logger.Logger, + statsFactory stats.Stats, + api api, + destination *backendconfig.DestinationT, +) api { + return &apiAdapter{ + logger: logger, + statsFactory: statsFactory, + destination: destination, + api: api, } - - api } -func newApiAdapter(api api, statsFactory stats.Stats, destination *backendconfig.DestinationT) *apiAdapter { - adapter := &apiAdapter{} - adapter.api = api - - tags := stats.Tags{ +func (a *apiAdapter) defaultTags() stats.Tags { + return stats.Tags{ "module": "batch_router", - "workspaceId": destination.WorkspaceID, - "destType": destination.DestinationDefinition.Name, - "destinationId": destination.ID, + "workspaceId": a.destination.WorkspaceID, + "destType": a.destination.DestinationDefinition.Name, + "destinationId": a.destination.ID, } - adapter.stats.createChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_count", stats.CountType, tags) - adapter.stats.deleteChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_count", stats.CountType, tags) - adapter.stats.insertCount = statsFactory.NewTaggedStat("snowpipestreaming_insert_count", stats.CountType, tags) - adapter.stats.statusCount = statsFactory.NewTaggedStat("snowpipestreaming_status_count", stats.CountType, tags) - adapter.stats.createChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_response_time", stats.TimerType, tags) - adapter.stats.deleteChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_response_time", stats.TimerType, tags) - adapter.stats.insertResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_insert_response_time", stats.TimerType, tags) - adapter.stats.statusResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_status_response_time", stats.TimerType, tags) - - return adapter } func (a *apiAdapter) CreateChannel(ctx context.Context, req *model.CreateChannelRequest) (*model.ChannelResponse, error) { - defer a.stats.createChannelCount.Increment() - defer a.stats.createChannelResponseTime.RecordDuration()() - return a.api.CreateChannel(ctx, req) + a.logger.Infon("Creating channel", + logger.NewStringField("rudderIdentifier", req.RudderIdentifier), + logger.NewStringField("partition", req.Partition), + logger.NewStringField("database", req.TableConfig.Database), + logger.NewStringField("namespace", req.TableConfig.Schema), + logger.NewStringField("table", req.TableConfig.Table), + ) + tags := a.defaultTags() + tags["api"] = "create_channel" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.CreateChannel(ctx, req) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + tags["code"] = resp.Code + return resp, nil } func (a *apiAdapter) DeleteChannel(ctx context.Context, channelID string, sync bool) error { - defer a.stats.deleteChannelCount.Increment() - defer a.stats.deleteChannelResponseTime.RecordDuration()() - return a.api.DeleteChannel(ctx, channelID, sync) + a.logger.Infon("Deleting channel", + logger.NewStringField("channelId", channelID), + logger.NewBoolField("sync", sync), + ) + tags := a.defaultTags() + tags["api"] = "delete_channel" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + err := a.api.DeleteChannel(ctx, channelID, sync) + if err != nil { + tags["status"] = "false" + return err + } + tags["status"] = "true" + return nil } func (a *apiAdapter) Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) { - defer a.stats.insertCount.Increment() - defer a.stats.insertResponseTime.RecordDuration()() - return a.api.Insert(ctx, channelID, insertRequest) + tags := a.defaultTags() + tags["api"] = "insert" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.Insert(ctx, channelID, insertRequest) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + tags["code"] = resp.Code + return resp, nil } func (a *apiAdapter) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { - defer a.stats.statusCount.Increment() - defer a.stats.statusResponseTime.RecordDuration()() - return a.api.Status(ctx, channelID) + tags := a.defaultTags() + tags["api"] = "status" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.Status(ctx, channelID) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + return resp, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go index 7f7042866c..8b45e8801f 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go @@ -4,17 +4,52 @@ import ( "context" "fmt" - "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" + "github.com/rudderlabs/rudder-go-kit/logger" + internalapi "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) +func (m *Manager) prepareChannelResponse( + ctx context.Context, + destinationID string, + destConf *destConfig, + tableName string, + eventSchema whutils.ModelTableSchema, +) (*model.ChannelResponse, error) { + channelResponse, err := m.createChannel(ctx, destinationID, destConf, tableName, eventSchema) + if err != nil { + return nil, fmt.Errorf("creating channel for table %s: %w", tableName, err) + } + + columnInfos := findNewColumns(eventSchema, channelResponse.SnowPipeSchema) + if len(columnInfos) > 0 { + m.logger.Infon("Adding columns", + logger.NewStringField("table", tableName), + logger.NewIntField("columns", int64(len(columnInfos))), + ) + + if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { + return nil, fmt.Errorf("adding columns for table %s: %w", tableName, err) + } + + channelResponse, err = m.recreateChannel(ctx, destinationID, destConf, tableName, eventSchema, channelResponse.ChannelID) + if err != nil { + return nil, fmt.Errorf("recreating channel for table %s: %w", tableName, err) + } + } + return channelResponse, nil +} + +// createChannel creates a new channel for importing data to Snowpipe. +// If the channel already exists in the cache, it returns the cached response. +// Otherwise, it sends a request to create a new channel and handles potential errors. func (m *Manager) createChannel( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + rudderIdentifier string, + destConf *destConfig, tableName string, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { @@ -23,7 +58,7 @@ func (m *Manager) createChannel( } req := &model.CreateChannelRequest{ - RudderIdentifier: asyncDest.Destination.ID, + RudderIdentifier: rudderIdentifier, Partition: m.config.instanceID, AccountConfig: model.AccountConfig{ Account: destConf.Account, @@ -41,7 +76,7 @@ func (m *Manager) createChannel( resp, err := m.api.CreateChannel(ctx, req) if err != nil { - return nil, fmt.Errorf("creating channel: %v", err) + return nil, fmt.Errorf("creating channel: %w", err) } if resp.Success { m.channelCache.Store(tableName, resp) @@ -52,94 +87,113 @@ func (m *Manager) createChannel( case internalapi.ErrSchemaDoesNotExistOrNotAuthorized: resp, err = m.handleSchemaError(ctx, req, eventSchema) if err != nil { - return nil, fmt.Errorf("handling schema error: %v", err) + return nil, fmt.Errorf("creating channel for schema error: %w", err) } if !resp.Success { - return nil, fmt.Errorf("creating channel for schema error: %s", resp.Error) + return nil, fmt.Errorf("creating channel for schema error with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } m.channelCache.Store(tableName, resp) return resp, nil case internalapi.ErrTableDoesNotExistOrNotAuthorized: resp, err = m.handleTableError(ctx, req, eventSchema) if err != nil { - return nil, fmt.Errorf("handling table error: %v", err) + return nil, fmt.Errorf("creating channel for table error: %w", err) } if !resp.Success { - return nil, fmt.Errorf("creating channel for table error: %s", resp.Error) + return nil, fmt.Errorf("creating channel for table error with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } m.channelCache.Store(tableName, resp) return resp, nil default: - return nil, fmt.Errorf("creating channel: %v", err) + return nil, fmt.Errorf("creating channel with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } } +// handleSchemaError handles errors related to missing schemas. +// It creates the necessary schema and table, then attempts to create the channel again. func (m *Manager) handleSchemaError( ctx context.Context, channelReq *model.CreateChannelRequest, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { - m.stats.channelSchemaCreationErrorCount.Increment() + m.logger.Infon("Handling schema error", + logger.NewStringField("schema", channelReq.TableConfig.Schema), + logger.NewStringField("table", channelReq.TableConfig.Table), + ) snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err := snowflakeManager.CreateSchema(ctx); err != nil { - return nil, fmt.Errorf("creating schema: %v", err) + return nil, fmt.Errorf("creating schema: %w", err) } if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { - return nil, fmt.Errorf("creating table: %v", err) + return nil, fmt.Errorf("creating table: %w", err) } return m.api.CreateChannel(ctx, channelReq) } +// handleTableError handles errors related to missing tables. +// It creates the necessary table and then attempts to create the channel again. func (m *Manager) handleTableError( ctx context.Context, channelReq *model.CreateChannelRequest, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { - m.stats.channelTableCreationErrorCount.Increment() + m.logger.Infon("Handling table error", + logger.NewStringField("schema", channelReq.TableConfig.Schema), + logger.NewStringField("table", channelReq.TableConfig.Table), + ) snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { - return nil, fmt.Errorf("creating table: %v", err) + return nil, fmt.Errorf("creating table: %w", err) } return m.api.CreateChannel(ctx, channelReq) } +// recreateChannel deletes an existing channel and then creates a new one. +// It returns the new channel response or an error if the process fails. func (m *Manager) recreateChannel( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + destinationID string, + destConf *destConfig, tableName string, eventSchema whutils.ModelTableSchema, - existingChannelResponse *model.ChannelResponse, + existingChannelID string, ) (*model.ChannelResponse, error) { - if err := m.deleteChannel(ctx, tableName, existingChannelResponse.ChannelID); err != nil { - return nil, fmt.Errorf("deleting channel: %v", err) + m.logger.Infon("Recreating channel", + logger.NewStringField("destinationID", destinationID), + logger.NewStringField("tableName", tableName), + ) + + if err := m.deleteChannel(ctx, tableName, existingChannelID); err != nil { + return nil, fmt.Errorf("deleting channel: %w", err) } - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + channelResponse, err := m.createChannel(ctx, destinationID, destConf, tableName, eventSchema) if err != nil { - return nil, fmt.Errorf("recreating channel: %v", err) + return nil, fmt.Errorf("recreating channel: %w", err) } return channelResponse, nil } -func (m *Manager) deleteChannel(ctx context.Context, tableName string, channelID string) error { +// deleteChannel removes a channel from the cache and deletes it from the Snowpipe. +// It returns an error if the deletion fails. +func (m *Manager) deleteChannel(ctx context.Context, tableName, channelID string) error { m.channelCache.Delete(tableName) if err := m.api.DeleteChannel(ctx, channelID, true); err != nil { - return fmt.Errorf("deleting channel: %v", err) + return fmt.Errorf("deleting channel: %w", err) } return nil } @@ -154,13 +208,13 @@ func (m *Manager) createSnowflakeManager(ctx context.Context, namespace string) } modelWarehouse.Destination.Config["useKeyPairAuth"] = true // Since we are currently only supporting key pair auth - sf, err := manager.New(whutils.SNOWFLAKE, m.conf, m.logger, m.statsFactory) + sf, err := manager.New(whutils.SnowpipeStreaming, m.conf, m.logger, m.statsFactory) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } err = sf.Setup(ctx, modelWarehouse, &whutils.NopUploader{}) if err != nil { - return nil, fmt.Errorf("setting up snowflake manager: %v", err) + return nil, fmt.Errorf("setting up snowflake manager: %w", err) } return sf, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go index a3173e8de1..fc6be3fb7d 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go @@ -10,13 +10,13 @@ import ( func (m *Manager) addColumns(ctx context.Context, namespace, tableName string, columns []whutils.ColumnInfo) error { snowflakeManager, err := m.createSnowflakeManager(ctx, namespace) if err != nil { - return fmt.Errorf("creating snowflake manager: %v", err) + return fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err = snowflakeManager.AddColumns(ctx, tableName, columns); err != nil { - return fmt.Errorf("adding columns: %v", err) + return fmt.Errorf("adding columns: %w", err) } return nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go index 551f98cec8..a7390acd3e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go @@ -5,101 +5,75 @@ import ( "fmt" "strconv" - obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" "github.com/samber/lo" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" "github.com/rudderlabs/rudder-server/warehouse/slave" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) -func (m *Manager) loadDiscardsToSnowPipe( +// sendDiscardEVentsToSnowpipe uploads discarded records to the Snowpipe table. +// It creates a channel for the upload, adds any new columns needed, and inserts the discard data. +func (m *Manager) sendDiscardEVentsToSnowpipe( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + info *uploadInfo, discardInfos []discardInfo, -) (*uploadInfo, error) { - tableName, eventSchema := discardsTable(), discardsSchema() +) (*importInfo, error) { + tableName := discardsTable() + offset := strconv.FormatInt(info.latestJobID, 10) log := m.logger.Withn( logger.NewStringField("table", tableName), logger.NewIntField("events", int64(len(discardInfos))), + logger.NewStringField("offset", offset), ) - log.Infon("Uploading data to table") - - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) - if err != nil { - return nil, fmt.Errorf("creating channel: %v", err) - } - - columnInfos := findNewColumns(eventSchema, channelResponse.SnowPipeSchema()) - if len(columnInfos) > 0 { - if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { - return nil, fmt.Errorf("adding columns: %v", err) - } - - channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) - if err != nil { - return nil, fmt.Errorf("recreating channel: %v", err) - } - } - - offset := strconv.FormatInt(m.now().Unix(), 10) insertReq := &model.InsertRequest{ - Rows: createRowsFromDiscardInfos(discardInfos), + Rows: discardRows(discardInfos), Offset: offset, } - insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) - if err != nil { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - m.logger.Warnn("Failed to delete channel", - logger.NewStringField("table", tableName), - obskit.Error(deleteErr), - ) + insertRes, err := m.api.Insert(ctx, info.discardChannelResponse.ChannelID, insertReq) + defer func() { + if err != nil || !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, tableName, info.discardChannelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } } - return nil, fmt.Errorf("inserting data: %v", err) + }() + if err != nil { + return nil, fmt.Errorf("inserting data to discards: %v", err) } if !insertRes.Success { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - m.logger.Warnn("Failed to delete channel", - logger.NewStringField("table", tableName), - obskit.Error(deleteErr), - ) - } return nil, errInsertingDataFailed } - m.logger.Infon("Successfully uploaded data to table", - logger.NewStringField("table", tableName), - logger.NewIntField("events", int64(len(discardInfos))), - ) - m.stats.discardCount.Count(len(discardInfos)) - idOffset := &uploadInfo{ - ChannelID: channelResponse.ChannelID, + imInfo := &importInfo{ + ChannelID: info.discardChannelResponse.ChannelID, Offset: offset, Table: tableName, + Count: len(discardInfos), } - return idOffset, nil + return imInfo, nil } func discardsTable() string { - return whutils.ToProviderCase(whutils.SNOWFLAKE, whutils.DiscardsTable) + return whutils.ToProviderCase(whutils.SnowpipeStreaming, whutils.DiscardsTable) } func discardsSchema() whutils.ModelTableSchema { return lo.MapEntries(whutils.DiscardsSchema, func(colName, colType string) (string, string) { - return whutils.ToProviderCase(whutils.SNOWFLAKE, colName), colType + return whutils.ToProviderCase(whutils.SnowpipeStreaming, colName), colType }) } -func createRowsFromDiscardInfos(discardInfos []discardInfo) []model.Row { +func discardRows(discardInfos []discardInfo) []model.Row { return lo.FilterMap(discardInfos, func(info discardInfo, _ int) (model.Row, bool) { - id, idExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "id")] - receivedAt, receivedAtExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "received_at")] + id, idExists := info.eventData[whutils.ToProviderCase(whutils.SnowpipeStreaming, "id")] + receivedAt, receivedAtExists := info.eventData[whutils.ToProviderCase(whutils.SnowpipeStreaming, "received_at")] if !idExists || !receivedAtExists { return nil, false @@ -118,7 +92,7 @@ func createRowsFromDiscardInfos(discardInfos []discardInfo) []model.Row { } func discardedRecords( - event event, + event *event, snowPipeSchema whutils.ModelTableSchema, tableName string, formattedTS string, diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go index 7d0a8aa7fb..bbb15388ca 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go @@ -3,6 +3,8 @@ package api import ( "io" "net/http" + + jsoniter "github.com/json-iterator/go" ) type API struct { @@ -14,6 +16,8 @@ type requestDoer interface { Do(*http.Request) (*http.Response, error) } +var json = jsoniter.ConfigCompatibleWithStandardLibrary + func New(clientURL string, requestDoer requestDoer) *API { return &API{ clientURL: clientURL, @@ -21,7 +25,7 @@ func New(clientURL string, requestDoer requestDoer) *API { } } -func mustReadAll(r io.Reader) []byte { +func mustRead(r io.Reader) []byte { data, _ := io.ReadAll(r) return data } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go index 70d363d8bc..b156415e11 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go @@ -48,12 +48,12 @@ func (nopReadCloser) Close() error { func TestMustReadAll(t *testing.T) { t.Run("ReadAll", func(t *testing.T) { r := strings.NewReader("hello") - data := mustReadAll(r) + data := mustRead(r) require.Equal(t, []byte("hello"), data) }) t.Run("ReadAll error", func(t *testing.T) { r := iotest.ErrReader(errors.New("error")) - data := mustReadAll(r) + data := mustRead(r) require.Empty(t, data) }) } @@ -79,7 +79,7 @@ func TestAPI(t *testing.T) { ctx := context.Background() - namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + namespace := testhelper.RandSchema() table := "TEST_TABLE" tableSchema := whutils.ModelTableSchema{ "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", @@ -133,63 +133,8 @@ func TestAPI(t *testing.T) { require.NotEmpty(t, createChannelRes.ChannelID) require.True(t, createChannelRes.Valid) require.False(t, createChannelRes.Deleted) - require.EqualValues(t, map[string]map[string]interface{}{ - "ACTIVE": { - "byteLength": nil, - "length": nil, - "logicalType": "BOOLEAN", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "BOOLEAN", - }, - "AGE": { - "byteLength": nil, - "length": nil, - "logicalType": "FIXED", - "nullable": true, - "precision": float64(38), - "scale": float64(0), - "type": "NUMBER(38,0)", - }, - "DOB": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - "EMAIL": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "NAME": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - }, - createChannelRes.TableSchema, + require.EqualValues(t, whutils.ModelTableSchema{"ACTIVE": "boolean", "AGE": "int", "DOB": "datetime", "EMAIL": "string", "ID": "string", "NAME": "string"}, + createChannelRes.SnowPipeSchema, ) t.Log("Getting channel") @@ -248,7 +193,7 @@ func TestAPI(t *testing.T) { ctx := context.Background() - namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + namespace := testhelper.RandSchema() table := "TEST_TABLE" tableSchema := whutils.ModelTableSchema{ "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/codes.go similarity index 100% rename from router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go rename to router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/codes.go diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go index 6553df16f5..166de72f97 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "fmt" "net/http" @@ -18,8 +17,8 @@ func (a *API) CreateChannel(ctx context.Context, channelReq *model.CreateChannel return nil, fmt.Errorf("marshalling create channel request: %w", err) } - channelReqURL := a.clientURL + "/channels" - req, err := http.NewRequestWithContext(ctx, http.MethodPost, channelReqURL, bytes.NewBuffer(reqJSON)) + craeteChannelURL := a.clientURL + "/channels" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, craeteChannelURL, bytes.NewBuffer(reqJSON)) if err != nil { return nil, fmt.Errorf("creating create channel request: %w", err) } @@ -32,7 +31,7 @@ func (a *API) CreateChannel(ctx context.Context, channelReq *model.CreateChannel defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for create channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for create channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.ChannelResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go index dc7975e6d6..98063f0c03 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "errors" "io" "net/http" @@ -13,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestCreateChannel(t *testing.T) { @@ -84,41 +84,13 @@ func TestCreateChannel(t *testing.T) { res, err := manager.CreateChannel(ctx, ccr) require.NoError(t, err) require.EqualValues(t, &model.ChannelResponse{ - Success: true, - ChannelID: "channelId", - ChannelName: "channelName", - ClientName: "clientName", - Valid: true, - Deleted: false, - TableSchema: map[string]map[string]interface{}{ - "EVENT": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "TIMESTAMP": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - }, + Success: true, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{"EVENT": "string", "ID": "string", "TIMESTAMP": "datetime"}, }, res, ) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go index f95c1d14fe..99d241ff58 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go @@ -28,7 +28,7 @@ func (a *API) DeleteChannel(ctx context.Context, channelID string, sync bool) er defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusAccepted { - return fmt.Errorf("invalid status code for delete channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return fmt.Errorf("invalid status code for delete channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } return nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go index ba9210c209..aa135cc159 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go @@ -2,7 +2,6 @@ package api import ( "context" - "encoding/json" "fmt" "net/http" @@ -26,7 +25,7 @@ func (a *API) GetChannel(ctx context.Context, channelID string) (*model.ChannelR defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for get channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for get channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.ChannelResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go index 8dc2d654b4..bfaef2861e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestGetChannel(t *testing.T) { @@ -37,40 +38,12 @@ func TestGetChannel(t *testing.T) { res, err := manager.GetChannel(ctx, channelID) require.NoError(t, err) require.EqualValues(t, &model.ChannelResponse{ - ChannelID: "channelId", - ChannelName: "channelName", - ClientName: "clientName", - Valid: true, - Deleted: false, - TableSchema: map[string]map[string]interface{}{ - "EVENT": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "TIMESTAMP": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - }, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{"EVENT": "string", "ID": "string", "TIMESTAMP": "datetime"}, }, res, ) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go index 9607bc772a..b15b3424e6 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "fmt" "net/http" @@ -17,8 +16,8 @@ func (a *API) Insert(ctx context.Context, channelID string, insertRequest *model return nil, fmt.Errorf("marshalling insert request: %w", err) } - insertReqURL := a.clientURL + "/channels/" + channelID + "/insert" - req, err := http.NewRequestWithContext(ctx, http.MethodPost, insertReqURL, bytes.NewBuffer(reqJSON)) + insertURL := a.clientURL + "/channels/" + channelID + "/insert" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, insertURL, bytes.NewBuffer(reqJSON)) if err != nil { return nil, fmt.Errorf("creating insert request: %w", err) } @@ -31,7 +30,7 @@ func (a *API) Insert(ctx context.Context, channelID string, insertRequest *model defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for insert: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for insert: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.InsertResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go index 2da32d898e..1c10a0ef40 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "errors" "io" "net/http" @@ -71,7 +70,8 @@ func TestInsert(t *testing.T) { RowIndex: 1, ExtraColNames: []string{"UNKNOWN"}, NullValueForNotNullColNames: nil, - Message: "The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1"}, + Message: "The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1", + }, }, Code: "ERR_SCHEMA_CONFLICT", }, diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go index 47b496563b..8c65fa9f8e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go @@ -2,7 +2,6 @@ package api import ( "context" - "encoding/json" "fmt" "net/http" @@ -11,8 +10,8 @@ import ( ) func (a *API) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { - statusReqURL := a.clientURL + "/channels/" + channelID + "/status" - req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusReqURL, nil) + statusURL := a.clientURL + "/channels/" + channelID + "/status" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusURL, nil) if err != nil { return nil, fmt.Errorf("creating status request: %w", err) } @@ -25,7 +24,7 @@ func (a *API) Status(ctx context.Context, channelID string) (*model.StatusRespon defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for status: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for status: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.StatusResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go index e429d19bf5..81121ac7e1 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go @@ -3,12 +3,15 @@ package model import ( "regexp" + jsoniter "github.com/json-iterator/go" + "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) var ( reType = regexp.MustCompile(`(.+?)\([^)]*\)`) + json = jsoniter.ConfigCompatibleWithStandardLibrary ) type ( @@ -31,20 +34,25 @@ type ( Table string `json:"table"` } + ColumnInfo struct { + Type *string `json:"type,omitempty"` + Scale *float64 `json:"scale,omitempty"` + } + ChannelResponse struct { - Success bool `json:"success"` - ChannelID string `json:"channelId"` - ChannelName string `json:"channelName"` - ClientName string `json:"clientName"` - Valid bool `json:"valid"` - Deleted bool `json:"deleted"` - TableSchema map[string]map[string]any `json:"tableSchema"` - Error string `json:"error"` - Code string `json:"code"` - SnowflakeSDKCode string `json:"snowflakeSDKCode"` - SnowflakeAPIHttpCode int64 `json:"snowflakeAPIHttpCode"` - SnowflakeAPIStatusCode int64 `json:"snowflakeAPIStatusCode"` - SnowflakeAPIMessage string `json:"snowflakeAPIMessage"` + Success bool `json:"success"` + ChannelID string `json:"channelId"` + ChannelName string `json:"channelName"` + ClientName string `json:"clientName"` + Valid bool `json:"valid"` + Deleted bool `json:"deleted"` + SnowPipeSchema whutils.ModelTableSchema `json:"-"` + Error string `json:"error"` + Code string `json:"code"` + SnowflakeSDKCode string `json:"snowflakeSDKCode"` + SnowflakeAPIHttpCode int64 `json:"snowflakeAPIHttpCode"` + SnowflakeAPIStatusCode int64 `json:"snowflakeAPIStatusCode"` + SnowflakeAPIMessage string `json:"snowflakeAPIMessage"` } InsertRequest struct { @@ -73,23 +81,41 @@ type ( } ) -func (c *ChannelResponse) SnowPipeSchema() whutils.ModelTableSchema { - warehouseSchema := make(whutils.ModelTableSchema) +func (c *ChannelResponse) UnmarshalJSON(data []byte) error { + type Alias ChannelResponse // Prevent recursion + temp := &struct { + TableSchema map[string]ColumnInfo `json:"tableSchema"` + *Alias + }{ + Alias: (*Alias)(c), + } + if err := json.Unmarshal(data, &temp); err != nil { + return err + } + c.SnowPipeSchema = calculateSnowPipeSchema(temp.TableSchema) + return nil +} - for column, info := range c.TableSchema { - dataType, isValidType := info["type"].(string) - if !isValidType { +func calculateSnowPipeSchema(tableSchema map[string]ColumnInfo) whutils.ModelTableSchema { + if len(tableSchema) == 0 { + return nil + } + warehouseSchema := make(whutils.ModelTableSchema) + for column, info := range tableSchema { + if info.Type == nil { continue } numericScale := int64(0) - if scale, scaleExists := info["scale"].(float64); scaleExists { - numericScale = int64(scale) + if info.Scale != nil { + numericScale = int64(*info.Scale) } - cleanedDataType := reType.ReplaceAllString(dataType, "$1") - - snowflakeType, _ := snowflake.CalculateDataType(cleanedDataType, numericScale) + cleanedDataType := reType.ReplaceAllString(*info.Type, "$1") + snowflakeType, ok := snowflake.CalculateDataType(cleanedDataType, numericScale) + if !ok { + continue + } warehouseSchema[column] = snowflakeType } return warehouseSchema diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go index 9c8696fc02..90f03b17b1 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go @@ -3,6 +3,7 @@ package model import ( "testing" + "github.com/samber/lo" "github.com/stretchr/testify/require" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -26,21 +27,22 @@ func TestTypeRegex(t *testing.T) { } } -func TestChannelResponse_SnowPipeSchema(t *testing.T) { +func TestChannelResponse_CalculateSnowPipeSchema(t *testing.T) { testCases := []struct { name string - tableSchema map[string]map[string]interface{} + tableSchema map[string]ColumnInfo expected whutils.ModelTableSchema }{ { name: "Valid types with scale", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "VARCHAR(16777216)"}, - "column2": {"type": "NUMBER(2,0)", "scale": 2.0}, - "column3": {"type": "NUMBER(2,0)", "scale": 0.0}, - "column4": {"type": "NUMBER(2,0)", "scale": 0}, - "column5": {"type": "BOOLEAN"}, - "column6": {"type": "TIMESTAMP_TZ(9)", "scale": float64(9)}, + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("VARCHAR(16777216)")}, + "column2": {Type: lo.ToPtr("NUMBER(2,0)"), Scale: lo.ToPtr(2.0)}, + "column3": {Type: lo.ToPtr("NUMBER(2,0)"), Scale: lo.ToPtr(0.0)}, + "column4": {Type: lo.ToPtr("NUMBER(2,0)")}, + "column5": {Type: lo.ToPtr("BOOLEAN")}, + "column6": {Type: lo.ToPtr("TIMESTAMP_TZ(9)"), Scale: lo.ToPtr(9.0)}, + "column7": {Type: lo.ToPtr("TIMESTAMP_TZ(9)"), Scale: lo.ToPtr(9.5)}, }, expected: whutils.ModelTableSchema{ "column1": "string", @@ -49,45 +51,91 @@ func TestChannelResponse_SnowPipeSchema(t *testing.T) { "column4": "int", "column5": "boolean", "column6": "datetime", + "column7": "datetime", }, }, { - name: "Invalid type field", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": 12345}, + name: "Unknown type", + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("VARCHAR(16777216)")}, + "column2": {Type: lo.ToPtr("UNKNOWN")}, + }, + expected: whutils.ModelTableSchema{ + "column1": "string", }, - expected: whutils.ModelTableSchema{}, }, { name: "Missing scale for number", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "NUMBER(2,0)"}, + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("NUMBER(2,0)")}, }, expected: whutils.ModelTableSchema{ "column1": "int", }, }, + { + name: "Missing type", + tableSchema: map[string]ColumnInfo{ + "column1": {Scale: lo.ToPtr(2.0)}, + }, + expected: whutils.ModelTableSchema{}, + }, { name: "Empty table schema", - tableSchema: map[string]map[string]interface{}{}, - expected: whutils.ModelTableSchema{}, + tableSchema: map[string]ColumnInfo{}, + expected: nil, }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, calculateSnowPipeSchema(tc.tableSchema)) + }) + } +} + +func TestChannelResponse_UnmarshalJSON(t *testing.T) { + testCases := []struct { + name string + response []byte + expectedResponse ChannelResponse + }{ { - name: "Type with regex cleaning", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "VARCHAR(255)"}, + name: "Valid success response", + response: []byte(`{"success":true,"channelId":"channelId","channelName":"channelName","clientName":"clientName","valid":true,"deleted":false,"tableSchema":{"EVENT":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"ID":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"TIMESTAMP":{"type":"TIMESTAMP_TZ(9)","logicalType":"TIMESTAMP_TZ","precision":0,"scale":9,"byteLength":null,"length":null,"nullable":true}}}`), + expectedResponse: ChannelResponse{ + Success: true, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{ + "EVENT": "string", + "ID": "string", + "TIMESTAMP": "datetime", + }, }, - expected: whutils.ModelTableSchema{ - "column1": "string", + }, + { + name: "Valid failure response", + response: []byte(`{"success":false,"error":"Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.","code":"ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED","snowflakeSDKCode":"0007","snowflakeAPIHttpCode":400,"snowflakeAPIStatusCode":4,"snowflakeAPIMessage":"The supplied table does not exist or is not authorized."}`), + expectedResponse: ChannelResponse{ + Success: false, + Error: "Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.", Code: "ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED", + SnowflakeSDKCode: "0007", + SnowflakeAPIHttpCode: 400, + SnowflakeAPIStatusCode: 4, + SnowflakeAPIMessage: "The supplied table does not exist or is not authorized.", }, }, } - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c := &ChannelResponse{} - c.TableSchema = tc.tableSchema - require.Equal(t, tc.expected, c.SnowPipeSchema()) + var response ChannelResponse + err := response.UnmarshalJSON(tc.response) + require.NoError(t, err) + require.Equal(t, tc.expectedResponse, response) }) } } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go index d7a1bedd15..5b3d82af5f 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go @@ -1,7 +1,5 @@ package snowpipestreaming -type Opt func(*Manager) - func WithRequestDoer(requestDoer requestDoer) Opt { return func(s *Manager) { s.requestDoer = requestDoer diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go index b34c911dc5..c1a20e90bb 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go @@ -4,65 +4,90 @@ import ( "context" "fmt" "net/http" - "time" + + "go.uber.org/atomic" "github.com/rudderlabs/rudder-go-kit/stringify" obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" ) +// Poll checks the status of multiple imports using the import ID from pollInput. +// It returns a PollStatusResponse indicating if any imports are still in progress or if any have failed. func (m *Manager) Poll(pollInput common.AsyncPoll) common.PollStatusResponse { - m.logger.Infon("Polling started", logger.NewStringField("importId", pollInput.ImportId)) + m.logger.Infon("Polling started") - var uploadInfos []uploadInfo - err := json.Unmarshal([]byte(pollInput.ImportId), &uploadInfos) + var infos []importInfo + err := json.Unmarshal([]byte(pollInput.ImportId), &infos) if err != nil { return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusBadRequest, Complete: true, HasFailed: true, - Error: fmt.Sprintf("failed to unmarshal import id: %v", err), + Error: fmt.Errorf("failed to unmarshal import id: %w", err).Error(), } } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - g, ctx := errgroup.WithContext(ctx) - g.SetLimit(m.config.maxConcurrentPollWorkers.Load()) + anyoneInProgress := atomic.NewBool(false) + for i := range infos { + info := &infos[i] + + inProgress, err := m.pollForImportInfo(ctx, info) + if err != nil { + infos[i].Failed = true + infos[i].Reason = err.Error() - for i, info := range uploadInfos { - g.Go(func() error { - if err := m.pollUploadInfo(ctx, info); err != nil { - uploadInfos[i].Failed = true - uploadInfos[i].Reason = err.Error() - m.logger.Warnn("Failed to poll channel offset", + m.logger.Warnn("Failed to poll channel offset", + logger.NewStringField("channelId", info.ChannelID), + logger.NewStringField("offset", info.Offset), + logger.NewStringField("table", info.Table), + obskit.Error(err), + ) + + if deleteErr := m.deleteChannel(ctx, info.Table, info.ChannelID); deleteErr != nil { + m.logger.Warnn("Failed to delete channel", logger.NewStringField("channelId", info.ChannelID), - logger.NewStringField("offset", info.Offset), logger.NewStringField("table", info.Table), - obskit.Error(err), + obskit.Error(deleteErr), ) } - return nil - }) + continue + } + anyoneInProgress.Store(anyoneInProgress.Load() || inProgress) + } + if anyoneInProgress.Load() { + return common.PollStatusResponse{InProgress: true} + } + + var successJobsCount, failedJobsCount int + var failedExists bool + for _, info := range infos { + if info.Failed { + failedJobsCount += info.Count + failedExists = true + } else { + successJobsCount += info.Count + } } - _ = g.Wait() + m.stats.jobs.failed.Count(failedJobsCount) + m.stats.jobs.succeeded.Count(successJobsCount) - if err := g.Wait(); err != nil { + if failedExists { return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusOK, Complete: true, HasFailed: true, - FailedJobURLs: stringify.Any(uploadInfos), + FailedJobURLs: stringify.Any(infos), } } - return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusOK, @@ -72,32 +97,26 @@ func (m *Manager) Poll(pollInput common.AsyncPoll) common.PollStatusResponse { } } -func (m *Manager) pollUploadInfo(ctx context.Context, info uploadInfo) error { +func (m *Manager) pollForImportInfo(ctx context.Context, info *importInfo) (bool, error) { log := m.logger.Withn( logger.NewStringField("channelId", info.ChannelID), logger.NewStringField("offset", info.Offset), logger.NewStringField("table", info.Table), ) - log.Infon("Polling for channel") + log.Infon("Polling for import info") - for { - statusRes, err := m.api.Status(ctx, info.ChannelID) - if err != nil { - return fmt.Errorf("getting status: %v", err) - } - if !statusRes.Valid || !statusRes.Success { - return errInvalidStatusResponse - } - if statusRes.Offset == info.Offset { - log.Infon("Polling completed") - return nil - } - log.Infon("Polling in progress. Sleeping before next poll.", - logger.NewStringField("statusOffset", statusRes.Offset), - logger.NewBoolField("statusSuccess", statusRes.Success), - logger.NewBoolField("statusValid", statusRes.Valid), - logger.NewDurationField("pollFrequency", m.config.pollFrequency), - ) - time.Sleep(m.config.pollFrequency) + statusRes, err := m.api.Status(ctx, info.ChannelID) + if err != nil { + return false, fmt.Errorf("getting status: %w", err) + } + log.Infon("Polled import info", + logger.NewBoolField("success", statusRes.Success), + logger.NewStringField("polledOffset", statusRes.Offset), + logger.NewBoolField("valid", statusRes.Valid), + logger.NewBoolField("completed", statusRes.Offset == info.Offset), + ) + if !statusRes.Valid || !statusRes.Success { + return false, errInvalidStatusResponse } + return statusRes.Offset != info.Offset, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go index f9b23c76a5..3f3426c3af 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/go-retryablehttp" jsoniter "github.com/json-iterator/go" + "github.com/samber/lo" "github.com/rudderlabs/rudder-go-kit/bytesize" "github.com/rudderlabs/rudder-go-kit/config" @@ -60,10 +61,7 @@ func New( m.config.client.retryMax = conf.GetInt("SnowpipeStreaming.Client.retryWaitMin", 5) m.config.clientURL = conf.GetString("SnowpipeStreaming.Client.URL", "http://localhost:9078") m.config.instanceID = conf.GetString("INSTANCE_ID", "1") - m.config.pollFrequency = conf.GetDuration("SnowpipeStreaming.pollFrequency", 300, time.Millisecond) m.config.maxBufferCapacity = conf.GetReloadableInt64Var(512*bytesize.KB, bytesize.B, "SnowpipeStreaming.maxBufferCapacity") - m.config.maxConcurrentPollWorkers = conf.GetReloadableIntVar(10, 1, "SnowpipeStreaming.maxConcurrentPollWorkers") - m.config.maxConcurrentUploadWorkers = conf.GetReloadableIntVar(8, 1, "SnowpipeStreaming.maxConcurrentUploadWorkers") tags := stats.Tags{ "module": "batch_router", @@ -71,19 +69,27 @@ func New( "destType": destination.DestinationDefinition.Name, "destinationId": destination.ID, } - m.stats.successJobCount = statsFactory.NewTaggedStat("snowpipestreaming_success_job_count", stats.CountType, tags) - m.stats.failedJobCount = statsFactory.NewTaggedStat("snowpipestreaming_failed_jobs_count", stats.CountType, tags) - m.stats.discardCount = statsFactory.NewTaggedStat("snowpipestreaming_discards_count", stats.CountType, tags) - m.stats.channelSchemaCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_schema_error", stats.CountType, tags) - m.stats.channelTableCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_table_error", stats.CountType, tags) + m.stats.jobs.succeeded = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "succeeded", + })) + m.stats.jobs.failed = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "failed", + })) + m.stats.jobs.aborted = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "aborted", + })) + m.stats.jobs.discarded = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "discarded", + })) if m.requestDoer == nil { m.requestDoer = m.retryableClient().StandardClient() } m.api = newApiAdapter( - snowpipeapi.New(m.config.clientURL, m.requestDoer), + m.logger, statsFactory, + snowpipeapi.New(m.config.clientURL, m.requestDoer), destination, ) return m diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go new file mode 100644 index 0000000000..d0d5ba4419 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go @@ -0,0 +1,177 @@ +package snowpipestreaming + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestFindNewColumns(t *testing.T) { + tests := []struct { + name string + eventSchema whutils.ModelTableSchema + snowPipeSchema whutils.ModelTableSchema + expected []whutils.ColumnInfo + }{ + { + name: "new column with different data type in event schema", + eventSchema: whutils.ModelTableSchema{ + "new_column": "STRING", + "existing_column": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "INT", + }, + expected: []whutils.ColumnInfo{ + {Name: "new_column", Type: "STRING"}, + }, + }, + { + name: "new and existing columns with multiple data types", + eventSchema: whutils.ModelTableSchema{ + "new_column1": "STRING", + "new_column2": "BOOLEAN", + "existing_column": "INT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "INT", + "another_existing_column": "FLOAT", + }, + expected: []whutils.ColumnInfo{ + {Name: "new_column1", Type: "STRING"}, + {Name: "new_column2", Type: "BOOLEAN"}, + }, + }, + { + name: "all columns in event schema are new", + eventSchema: whutils.ModelTableSchema{ + "new_column1": "STRING", + "new_column2": "BOOLEAN", + "new_column3": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{}, + expected: []whutils.ColumnInfo{ + {Name: "new_column1", Type: "STRING"}, + {Name: "new_column2", Type: "BOOLEAN"}, + {Name: "new_column3", Type: "FLOAT"}, + }, + }, + { + name: "case sensitivity check", + eventSchema: whutils.ModelTableSchema{ + "ColumnA": "STRING", + "columna": "BOOLEAN", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "columna": "BOOLEAN", + }, + expected: []whutils.ColumnInfo{ + {Name: "ColumnA", Type: "STRING"}, + }, + }, + { + name: "all columns match with identical types", + eventSchema: whutils.ModelTableSchema{ + "existing_column1": "STRING", + "existing_column2": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column1": "STRING", + "existing_column2": "FLOAT", + }, + expected: []whutils.ColumnInfo{}, + }, + { + name: "event schema is empty, SnowPipe schema has columns", + eventSchema: whutils.ModelTableSchema{}, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "STRING", + }, + expected: []whutils.ColumnInfo{}, + }, + { + name: "SnowPipe schema is nil", + eventSchema: whutils.ModelTableSchema{ + "new_column": "STRING", + }, + snowPipeSchema: nil, + expected: []whutils.ColumnInfo{ + {Name: "new_column", Type: "STRING"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := findNewColumns(tt.eventSchema, tt.snowPipeSchema) + assert.ElementsMatch(t, tt.expected, result) + }) + } +} + +func TestDestConfig_Decode(t *testing.T) { + tests := []struct { + name string + input map[string]interface{} + expected destConfig + expectedErr bool + }{ + { + name: "Valid Input", + input: map[string]interface{}{ + "account": "test-account", + "warehouse": "test-warehouse", + "database": "test-database", + "user": "test-user", + "role": "test-role", + "privateKey": "test-key", + "privateKeyPassphrase": "test-passphrase", + "namespace": "test-namespace", + }, + expected: destConfig{ + Account: "test-account", + Warehouse: "test-warehouse", + Database: "test-database", + User: "test-user", + Role: "test-role", + PrivateKey: "test-key", + PrivateKeyPassphrase: "test-passphrase", + Namespace: "TEST_NAMESPACE", + }, + expectedErr: false, + }, + { + name: "Invalid Input", + input: map[string]interface{}{ + "account": 123, // Invalid type + }, + expected: destConfig{}, + expectedErr: true, + }, + { + name: "Empty Map", + input: map[string]interface{}{}, + expected: destConfig{ + Namespace: "STRINGEMPTY", + }, + expectedErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var config destConfig + err := config.Decode(tt.input) + + if tt.expectedErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tt.expected, config) + } + }) + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml index 0c9f5ea530..4c223f96b6 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -2,7 +2,7 @@ version: "3.9" services: rudder-snowpipe-clients: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + image: "rudderstack/rudder-snowpipe-clients:develop" ports: - "9078" healthcheck: diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go index c97100f62d..6392fc540b 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go @@ -11,10 +11,9 @@ import ( "testing" "time" - "github.com/rudderlabs/rudder-go-kit/testhelper/rand" "github.com/stretchr/testify/require" - whutils "github.com/rudderlabs/rudder-server/warehouse/utils" + "github.com/rudderlabs/rudder-go-kit/testhelper/rand" ) const ( @@ -40,20 +39,18 @@ func GetSnowPipeTestCredentials(key string) (*TestCredentials, error) { var credentials TestCredentials err := json.Unmarshal([]byte(cred), &credentials) if err != nil { - return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) + return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %w", key, err) } return &credentials, nil } -func RandSchema(provider string) string { +func RandSchema() string { hex := strings.ToLower(rand.String(12)) namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) - return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, - namespace, - )) + return strings.ToUpper(namespace) } -func DropSchema(t *testing.T, db *sql.DB, namespace string) { +func DropSchema(t testing.TB, db *sql.DB, namespace string) { t.Helper() t.Log("dropping schema", namespace) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go index 0a44550768..bd366ede3b 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/mitchellh/mapstructure" + "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" @@ -39,20 +41,18 @@ type ( retryMax int } - clientURL string - instanceID string - pollFrequency time.Duration - maxBufferCapacity config.ValueLoader[int64] - maxConcurrentPollWorkers config.ValueLoader[int] - maxConcurrentUploadWorkers config.ValueLoader[int] + clientURL string + instanceID string + maxBufferCapacity config.ValueLoader[int64] } stats struct { - successJobCount stats.Counter - failedJobCount stats.Counter - discardCount stats.Counter - channelSchemaCreationErrorCount stats.Counter - channelTableCreationErrorCount stats.Counter + jobs struct { + succeeded stats.Counter + failed stats.Counter + aborted stats.Counter + discarded stats.Counter + } } } @@ -84,12 +84,13 @@ type ( Namespace string `mapstructure:"namespace"` } - uploadInfo struct { + importInfo struct { ChannelID string `json:"channelId"` Offset string `json:"offset"` Table string `json:"table"` Failed bool `json:"failed"` Reason string `json:"reason"` + Count int `json:"count"` } discardInfo struct { @@ -100,16 +101,45 @@ type ( uuidTS string } + uploadInfo struct { + tableName string + events []*event + jobIDs []int64 + eventsSchema whutils.ModelTableSchema + discardChannelResponse *model.ChannelResponse + latestJobID int64 + } + api interface { CreateChannel(ctx context.Context, channelReq *model.CreateChannelRequest) (*model.ChannelResponse, error) DeleteChannel(ctx context.Context, channelID string, sync bool) error Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) } + + apiAdapter struct { + logger logger.Logger + statsFactory stats.Stats + destination *backendconfig.DestinationT + api + } + + Opt func(*Manager) ) +func (d *destConfig) Decode(m map[string]interface{}) error { + if err := mapstructure.Decode(m, d); err != nil { + return err + } + d.Namespace = whutils.ToProviderCase( + whutils.SnowpipeStreaming, + whutils.ToSafeNamespace(whutils.SnowpipeStreaming, d.Namespace), + ) + return nil +} + func (e *event) setUUIDTimestamp(formattedTimestamp string) { - uuidTimestampColumn := whutils.ToProviderCase(whutils.SNOWFLAKE, "uuid_ts") + uuidTimestampColumn := whutils.ToProviderCase(whutils.SnowpipeStreaming, "uuid_ts") if _, columnExists := e.Message.Metadata.Columns[uuidTimestampColumn]; columnExists { e.Message.Data[uuidTimestampColumn] = formattedTimestamp } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go index 4c7248e35d..6af3c8252c 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go @@ -6,12 +6,10 @@ import ( stdjson "encoding/json" "fmt" "os" + "slices" "strconv" - "sync" - "github.com/mitchellh/mapstructure" "github.com/samber/lo" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/logger" obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" @@ -22,45 +20,111 @@ import ( whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) +// Upload processes an async destination upload to a Snowpipe streaming destination. +// It decodes the destination configuration, retrieves events from the specified file, +// and delegates to handleEvents to process the events. func (m *Manager) Upload(asyncDest *common.AsyncDestinationStruct) common.AsyncUploadOutput { m.logger.Infon("Uploading data to snowpipe streaming destination") var destConf destConfig - err := mapstructure.Decode(asyncDest.Destination.Config, &destConf) + err := destConf.Decode(asyncDest.Destination.Config) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to decode destination config: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to decode destination config: %w", err).Error()) } events, err := m.eventsFromFile(asyncDest.FileName) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to read events from file: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to read events from file: %w", err).Error()) } m.logger.Infon("Read events from file", logger.NewIntField("events", int64(len(events)))) - failedJobIDs, successJobIDs, uploadInfos := m.handleEvents(asyncDest, events, destConf) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + discardsChannel, err := m.prepareChannelResponse(ctx, asyncDest.Destination.ID, &destConf, discardsTable(), discardsSchema()) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to prepare discards channel: %w", err).Error()) + } + m.logger.Infon("Prepared discards channel") + + groupedEvents := lo.GroupBy(events, func(event *event) string { + return event.Message.Metadata.Table + }) + uploadInfos := lo.MapToSlice(groupedEvents, func(tableName string, tableEvents []*event) *uploadInfo { + jobIDs := lo.Map(tableEvents, func(event *event, _ int) int64 { + return event.Metadata.JobID + }) + latestJobID := lo.MaxBy(tableEvents, func(a, b *event) bool { + return a.Metadata.JobID > b.Metadata.JobID + }) + return &uploadInfo{ + tableName: tableName, + events: tableEvents, + jobIDs: jobIDs, + eventsSchema: schemaFromEvents(tableEvents), + discardChannelResponse: discardsChannel, + latestJobID: latestJobID.Metadata.JobID, + } + }) + slices.SortFunc(uploadInfos, func(a, b *uploadInfo) int { + return int(a.latestJobID - b.latestJobID) + }) + + var ( + importingJobIDs, failedJobIDs []int64 + importInfos, discardImportInfos []*importInfo + ) + for _, info := range uploadInfos { + imInfo, discardImInfo, err := m.sendEventsToSnowpipe(ctx, asyncDest.Destination.ID, &destConf, info) + if err != nil { + m.logger.Warnn("Failed to send events to Snowpipe", + logger.NewStringField("table", info.tableName), + obskit.Error(err), + ) + + failedJobIDs = append(failedJobIDs, info.jobIDs...) + continue + } + + importingJobIDs = append(importingJobIDs, info.jobIDs...) + importInfos = append(importInfos, imInfo) + + if discardImInfo != nil { + discardImportInfos = append(discardImportInfos, discardImInfo) + } + } + if len(failedJobIDs) > 0 { + m.stats.jobs.failed.Count(len(failedJobIDs)) + } + if len(discardImportInfos) > 0 { + discarded := 0 + for _, info := range discardImportInfos { + discarded += info.Count + } + m.stats.jobs.discarded.Count(discarded) + + importInfos = append(importInfos, discardImportInfos[len(discardImportInfos)-1]) + } var importParameters stdjson.RawMessage - if len(uploadInfos) > 0 { - importIDBytes, err := json.Marshal(uploadInfos) + if len(importInfos) > 0 { + importIDBytes, err := json.Marshal(importInfos) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import id: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import id: %w", err).Error()) } importParameters, err = json.Marshal(common.ImportParameters{ ImportId: string(importIDBytes), }) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import parameters: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import parameters: %w", err).Error()) } - } - m.logger.Infon("Uploaded data to snowpipe streaming destination") - - m.stats.failedJobCount.Count(len(failedJobIDs)) - m.stats.successJobCount.Count(len(successJobIDs)) + m.logger.Infon("Uploaded data to snowpipe streaming destination") + } return common.AsyncUploadOutput{ - ImportingJobIDs: successJobIDs, - ImportingCount: len(successJobIDs), + ImportingJobIDs: importingJobIDs, + ImportingCount: len(importingJobIDs), ImportingParameters: importParameters, FailedJobIDs: failedJobIDs, FailedCount: len(failedJobIDs), @@ -68,7 +132,7 @@ func (m *Manager) Upload(asyncDest *common.AsyncDestinationStruct) common.AsyncU } } -func (m *Manager) eventsFromFile(fileName string) ([]event, error) { +func (m *Manager) eventsFromFile(fileName string) ([]*event, error) { file, err := os.Open(fileName) if err != nil { return nil, fmt.Errorf("failed to open file %s: %w", fileName, err) @@ -77,173 +141,96 @@ func (m *Manager) eventsFromFile(fileName string) ([]event, error) { _ = file.Close() }() - var events []event + var events []*event + formattedTS := m.now().Format(misc.RFC3339Milli) scanner := bufio.NewScanner(file) scanner.Buffer(nil, int(m.config.maxBufferCapacity.Load())) for scanner.Scan() { var e event if err := json.Unmarshal(scanner.Bytes(), &e); err != nil { - return nil, fmt.Errorf("failed to unmarshal event: %v", err) + return nil, fmt.Errorf("failed to unmarshal event: %w", err) } + e.setUUIDTimestamp(formattedTS) - events = append(events, e) + events = append(events, &e) } if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error reading from file: %v", err) + return nil, fmt.Errorf("error reading from file: %w", err) } return events, nil } -func (m *Manager) handleEvents( - asyncDest *common.AsyncDestinationStruct, - events []event, - destConf destConfig, -) ( - failedJobIDs []int64, - successJobIDs []int64, - uploadInfos []*uploadInfo, -) { - var ( - discardInfos []discardInfo - mu sync.Mutex - ) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - g, gCtx := errgroup.WithContext(ctx) - g.SetLimit(m.config.maxConcurrentUploadWorkers.Load()) - - groupedEvents := lo.GroupBy(events, func(event event) string { - return event.Message.Metadata.Table - }) - for tableName, tableEvents := range groupedEvents { - g.Go(func() error { - jobIDs := lo.Map(tableEvents, func(event event, _ int) int64 { - return event.Metadata.JobID - }) - - uploadTableInfo, discardTableInfo, err := m.loadTableEventsToSnowPipe( - gCtx, asyncDest, destConf, tableName, tableEvents, - ) - - mu.Lock() - defer mu.Unlock() - - if err != nil { - m.logger.Warnn("Failed to upload events to table", - logger.NewStringField("table", tableName), - obskit.Error(err), - ) - - failedJobIDs = append(failedJobIDs, jobIDs...) - return nil - } - - successJobIDs = append(successJobIDs, jobIDs...) - uploadInfos = append(uploadInfos, uploadTableInfo) - discardInfos = append(discardInfos, discardTableInfo...) - return nil - }) - } - _ = g.Wait() - - if len(discardInfos) > 0 { - discardUploadInfo, err := m.loadDiscardsToSnowPipe(ctx, asyncDest, destConf, discardInfos) - if err != nil { - m.logger.Warnn("Failed to upload events to discards table", - logger.NewStringField("table", discardsTable()), - obskit.Error(err), - ) - } else { - uploadInfos = append(uploadInfos, discardUploadInfo) - } - } - return failedJobIDs, successJobIDs, uploadInfos -} - -func (m *Manager) loadTableEventsToSnowPipe( +func (m *Manager) sendEventsToSnowpipe( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, - tableName string, - tableEvents []event, -) (*uploadInfo, []discardInfo, error) { + destinationID string, + destConf *destConfig, + info *uploadInfo, +) (*importInfo, *importInfo, error) { + offset := strconv.FormatInt(info.latestJobID, 10) + log := m.logger.Withn( - logger.NewStringField("table", tableName), - logger.NewIntField("events", int64(len(tableEvents))), + logger.NewStringField("table", info.tableName), + logger.NewIntField("events", int64(len(info.events))), + logger.NewStringField("offset", offset), ) - log.Infon("Uploading data to table") - - eventSchema := schemaFromEvents(tableEvents) + log.Infon("Sending events to Snowpipe") - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + channelResponse, err := m.prepareChannelResponse(ctx, destinationID, destConf, info.tableName, info.eventsSchema) if err != nil { - return nil, nil, fmt.Errorf("creating channel: %v", err) - } - snowPipeSchema := channelResponse.SnowPipeSchema() - - columnInfos := findNewColumns(eventSchema, snowPipeSchema) - if len(columnInfos) > 0 { - if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { - return nil, nil, fmt.Errorf("adding columns: %v", err) - } - - channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) - if err != nil { - return nil, nil, fmt.Errorf("recreating channel: %v", err) - } - snowPipeSchema = channelResponse.SnowPipeSchema() + return nil, nil, fmt.Errorf("creating channel %s: %w", info.tableName, err) } + m.logger.Infon("Prepared channel", logger.NewStringField("channelID", channelResponse.ChannelID)) formattedTS := m.now().Format(misc.RFC3339Milli) - for _, tableEvent := range tableEvents { - tableEvent.setUUIDTimestamp(formattedTS) - } - - discardInfos := lo.FlatMap(tableEvents, func(tableEvent event, _ int) []discardInfo { - return discardedRecords(tableEvent, snowPipeSchema, tableName, formattedTS) - }) - - oldestEvent := lo.MaxBy(tableEvents, func(a, b event) bool { - return a.Metadata.JobID > b.Metadata.JobID + discardInfos := lo.FlatMap(info.events, func(tableEvent *event, _ int) []discardInfo { + return discardedRecords(tableEvent, channelResponse.SnowPipeSchema, info.tableName, formattedTS) }) - offset := strconv.FormatInt(oldestEvent.Metadata.JobID, 10) insertReq := &model.InsertRequest{ - Rows: lo.Map(tableEvents, func(event event, _ int) model.Row { + Rows: lo.Map(info.events, func(event *event, _ int) model.Row { return event.Message.Data }), Offset: offset, } + insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) - if err != nil { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + defer func() { + if err != nil || !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, info.tableName, channelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } } - return nil, nil, fmt.Errorf("inserting data: %v", err) + }() + if err != nil { + return nil, nil, fmt.Errorf("inserting data %s: %w", info.tableName, err) } if !insertRes.Success { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) - } return nil, nil, errInsertingDataFailed } - log.Infon("Successfully uploaded data to table") - info := &uploadInfo{ + var discardImInfo *importInfo + if len(discardInfos) > 0 { + discardImInfo, err = m.sendDiscardEVentsToSnowpipe(ctx, info, discardInfos) + if err != nil { + return nil, nil, fmt.Errorf("sending discard events to Snowpipe: %w", err) + } + } + log.Infon("Sent events to Snowpipe") + + imInfo := &importInfo{ ChannelID: channelResponse.ChannelID, Offset: offset, - Table: tableName, + Table: info.tableName, + Count: len(info.events), } - return info, discardInfos, nil + return imInfo, discardImInfo, nil } -// schemaFromEvents Iterate over events and merge their columns into the final map -// Keeping the first type first serve basis -func schemaFromEvents(events []event) whutils.ModelTableSchema { +// schemaFromEvents builds a schema by iterating over events and merging their columns +// using a first-encountered type basis for each column. +func schemaFromEvents(events []*event) whutils.ModelTableSchema { columnsMap := make(whutils.ModelTableSchema) for _, e := range events { for col, typ := range e.Message.Metadata.Columns { @@ -256,7 +243,7 @@ func schemaFromEvents(events []event) whutils.ModelTableSchema { } func (m *Manager) abortJobs(asyncDest *common.AsyncDestinationStruct, abortReason string) common.AsyncUploadOutput { - m.stats.failedJobCount.Count(len(asyncDest.ImportingJobIDs)) + m.stats.jobs.aborted.Count(len(asyncDest.ImportingJobIDs)) return common.AsyncUploadOutput{ AbortJobIDs: asyncDest.ImportingJobIDs, AbortCount: len(asyncDest.ImportingJobIDs), diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go index c3f790d382..ce780d3e4d 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go @@ -4,56 +4,53 @@ import ( "fmt" "net/http" - obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" "github.com/tidwall/gjson" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" ) +// GetUploadStats retrieves the upload status for Snowpipe streaming jobs. +// It parses the input to identify succeeded and failed tables, maps job IDs to their statuses, +// and returns a response with succeeded job IDs, failed job IDs, and failure reasons. func (m *Manager) GetUploadStats(input common.GetUploadStatsInput) common.GetUploadStatsResponse { - m.logger.Infon("Getting upload stats for snowpipe streaming destination") + m.logger.Infon("Getting import stats for snowpipe streaming destination") - var infos []uploadInfo + var infos []importInfo err := json.Unmarshal([]byte(input.FailedJobURLs), &infos) if err != nil { m.logger.Warnn("Failed to unmarshal failed job urls", obskit.Error(err)) return common.GetUploadStatsResponse{ - StatusCode: 500, - Error: fmt.Errorf("failed to unmarshal failed job urls: %v", err).Error(), + StatusCode: http.StatusBadRequest, + Error: fmt.Errorf("failed to unmarshal failed job urls: %w", err).Error(), } } - var ( - succeededTables map[string]uploadInfo - failedTables map[string]uploadInfo - ) - + succeededTables, failedTables := make(map[string]struct{}), make(map[string]importInfo) for _, info := range infos { if info.Failed { failedTables[info.Table] = info } else { - succeededTables[info.Table] = info + succeededTables[info.Table] = struct{}{} } } var ( succeededJobIDs []int64 failedJobIDs []int64 - failedJobReasons map[int64]string + failedJobReasons = make(map[int64]string) ) - for _, job := range input.ImportingList { tableName := gjson.GetBytes(job.EventPayload, "metadata.table").String() if _, ok := succeededTables[tableName]; ok { succeededJobIDs = append(succeededJobIDs, job.JobID) - continue } if info, ok := failedTables[tableName]; ok { failedJobIDs = append(failedJobIDs, job.JobID) failedJobReasons[job.JobID] = info.Reason } } - return common.GetUploadStatsResponse{ StatusCode: http.StatusOK, Metadata: common.EventStatMeta{ diff --git a/router/batchrouter/handle_async.go b/router/batchrouter/handle_async.go index 3571503a03..0a0157b959 100644 --- a/router/batchrouter/handle_async.go +++ b/router/batchrouter/handle_async.go @@ -25,6 +25,7 @@ import ( routerutils "github.com/rudderlabs/rudder-server/router/utils" "github.com/rudderlabs/rudder-server/services/rmetrics" "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/timeutil" utilTypes "github.com/rudderlabs/rudder-server/utils/types" ) @@ -379,7 +380,7 @@ func (brt *Handle) asyncStructSetup(sourceID, destinationID string, attemptNums brt.asyncDestinationStruct[destinationID].FirstAttemptedAts = firstAttemptedAts brt.asyncDestinationStruct[destinationID].OriginalJobParameters = originalJobParameters brt.asyncDestinationStruct[destinationID].FileName = jsonPath - brt.asyncDestinationStruct[destinationID].CreatedAt = time.Now() + brt.asyncDestinationStruct[destinationID].CreatedAt = timeutil.Now() brt.asyncDestinationStruct[destinationID].SourceJobRunID = newJobRunID } diff --git a/router/batchrouter/handle_lifecycle.go b/router/batchrouter/handle_lifecycle.go index 676e5c4145..ce8394015a 100644 --- a/router/batchrouter/handle_lifecycle.go +++ b/router/batchrouter/handle_lifecycle.go @@ -211,9 +211,9 @@ func (brt *Handle) setupReloadableVars() { brt.jobsDBCommandTimeout = config.GetReloadableDurationVar(600, time.Second, "JobsDB.BatchRouter.CommandRequestTimeout", "JobsDB.CommandRequestTimeout") brt.jobdDBQueryRequestTimeout = config.GetReloadableDurationVar(600, time.Second, "JobsDB.BatchRouter.QueryRequestTimeout", "JobsDB.QueryRequestTimeout") brt.jobdDBMaxRetries = config.GetReloadableIntVar(2, 1, "JobsDB.BatchRouter.MaxRetries", "JobsDB.MaxRetries") - brt.minIdleSleep = config.GetReloadableDurationVar(2, time.Second, "BatchRouter.minIdleSleep") - brt.uploadFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter.uploadFreqInS", "BatchRouter.uploadFreq") - brt.mainLoopFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter.mainLoopFreq") + brt.minIdleSleep = config.GetReloadableDurationVar(2, time.Second, "BatchRouter."+brt.destType+".minIdleSleep", "BatchRouter.minIdleSleep") + brt.uploadFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter."+brt.destType+".uploadFreqInS", "BatchRouter."+brt.destType+".uploadFreq", "BatchRouter.uploadFreqInS", "BatchRouter.uploadFreq") + brt.mainLoopFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter."+brt.destType+".mainLoopFreq", "BatchRouter.mainLoopFreq") brt.warehouseServiceMaxRetryTime = config.GetReloadableDurationVar(3, time.Hour, "BatchRouter.warehouseServiceMaxRetryTime", "BatchRouter.warehouseServiceMaxRetryTimeinHr") brt.datePrefixOverride = config.GetReloadableStringVar("", "BatchRouter.datePrefixOverride") brt.customDatePrefix = config.GetReloadableStringVar("", "BatchRouter.customDatePrefix") diff --git a/warehouse/integrations/manager/manager.go b/warehouse/integrations/manager/manager.go index e55465e958..d550f45fe0 100644 --- a/warehouse/integrations/manager/manager.go +++ b/warehouse/integrations/manager/manager.go @@ -65,7 +65,7 @@ func New(destType string, conf *config.Config, logger logger.Logger, stats stats return redshift.New(conf, logger, stats), nil case warehouseutils.BQ: return bigquery.New(conf, logger), nil - case warehouseutils.SNOWFLAKE: + case warehouseutils.SNOWFLAKE, warehouseutils.SnowpipeStreaming: return snowflake.New(conf, logger, stats), nil case warehouseutils.POSTGRES: return postgres.New(conf, logger, stats), nil @@ -90,7 +90,7 @@ func NewWarehouseOperations(destType string, conf *config.Config, logger logger. return redshift.New(conf, logger, stats), nil case warehouseutils.BQ: return bigquery.New(conf, logger), nil - case warehouseutils.SNOWFLAKE: + case warehouseutils.SNOWFLAKE, warehouseutils.SnowpipeStreaming: return snowflake.New(conf, logger, stats), nil case warehouseutils.POSTGRES: return postgres.New(conf, logger, stats), nil diff --git a/warehouse/integrations/testdata/docker-compose.transformer.yml b/warehouse/integrations/testdata/docker-compose.transformer.yml new file mode 100644 index 0000000000..08899300e3 --- /dev/null +++ b/warehouse/integrations/testdata/docker-compose.transformer.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + transformer: + image: "rudderstack/rudder-transformer:latest" + ports: + - "9090:9090" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://0.0.0.0:9090/health || exit 1 + interval: 1s + retries: 25 diff --git a/warehouse/utils/reservedkeywords.go b/warehouse/utils/reservedkeywords.go index f8b9fc3daa..71f553f8d7 100644 --- a/warehouse/utils/reservedkeywords.go +++ b/warehouse/utils/reservedkeywords.go @@ -94,6 +94,99 @@ var ReservedKeywords = map[string]map[string]bool{ "WHERE": true, "WITH": true, }, + "SNOWPIPE_STREAMING": { + "ACCOUNT": true, + "ALL": true, + "ALTER": true, + "AND": true, + "ANY": true, + "AS": true, + "BETWEEN": true, + "BY": true, + "CASE": true, + "CAST": true, + "CHECK": true, + "COLUMN": true, + "CONNECT": true, + "CONNECTION": true, + "CONSTRAINT": true, + "CREATE": true, + "CROSS": true, + "CURRENT": true, + "CURRENT_DATE": true, + "CURRENT_TIME": true, + "CURRENT_TIMESTAMP": true, + "CURRENT_USER": true, + "DATABASE": true, + "DELETE": true, + "DISTINCT": true, + "DROP": true, + "ELSE": true, + "EXISTS": true, + "FALSE": true, + "FOLLOWING": true, + "FOR": true, + "FROM": true, + "FULL": true, + "GRANT": true, + "GROUP": true, + "GSCLUSTER": true, + "HAVING": true, + "ILIKE": true, + "IN": true, + "INCREMENT": true, + "INNER": true, + "INSERT": true, + "INTERSECT": true, + "INTO": true, + "IS": true, + "ISSUE": true, + "JOIN": true, + "LATERAL": true, + "LEFT": true, + "LIKE": true, + "LOCALTIME": true, + "LOCALTIMESTAMP": true, + "MINUS": true, + "NATURAL": true, + "NOT": true, + "NULL": true, + "OF": true, + "ON": true, + "OR": true, + "ORDER": true, + "ORGANIZATION": true, + "QUALIFY": true, + "REGEXP": true, + "REVOKE": true, + "RIGHT": true, + "RLIKE": true, + "ROW": true, + "ROWS": true, + "SAMPLE": true, + "SCHEMA": true, + "SELECT": true, + "SET": true, + "SOME": true, + "START": true, + "TABLE": true, + "TABLESAMPLE": true, + "THEN": true, + "TO": true, + "TRIGGER": true, + "TRUE": true, + "TRY_CAST": true, + "UNION": true, + "UNIQUE": true, + "UPDATE": true, + "USING": true, + "VALUES": true, + "VIEW": true, + "WHEN": true, + "WHENEVER": true, + "WHERE": true, + "WITH": true, + }, "RS": { "AES128": true, "AES256": true, diff --git a/warehouse/utils/uploader.go b/warehouse/utils/uploader.go index 50af3e044f..b35ddb0ad3 100644 --- a/warehouse/utils/uploader.go +++ b/warehouse/utils/uploader.go @@ -7,11 +7,6 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/internal/model" ) -type ( - ModelWarehouse = model.Warehouse - ModelTableSchema = model.TableSchema -) - //go:generate mockgen -destination=../internal/mocks/utils/mock_uploader.go -package mock_uploader github.com/rudderlabs/rudder-server/warehouse/utils Uploader type Uploader interface { IsWarehouseSchemaEmpty() bool diff --git a/warehouse/utils/utils.go b/warehouse/utils/utils.go index e32ab9ef77..b556f8937b 100644 --- a/warehouse/utils/utils.go +++ b/warehouse/utils/utils.go @@ -38,17 +38,18 @@ import ( ) const ( - RS = "RS" - BQ = "BQ" - SNOWFLAKE = "SNOWFLAKE" - POSTGRES = "POSTGRES" - CLICKHOUSE = "CLICKHOUSE" - MSSQL = "MSSQL" - AzureSynapse = "AZURE_SYNAPSE" - DELTALAKE = "DELTALAKE" - S3Datalake = "S3_DATALAKE" - GCSDatalake = "GCS_DATALAKE" - AzureDatalake = "AZURE_DATALAKE" + RS = "RS" + BQ = "BQ" + SNOWFLAKE = "SNOWFLAKE" + SnowpipeStreaming = "SNOWPIPE_STREAMING" + POSTGRES = "POSTGRES" + CLICKHOUSE = "CLICKHOUSE" + MSSQL = "MSSQL" + AzureSynapse = "AZURE_SYNAPSE" + DELTALAKE = "DELTALAKE" + S3Datalake = "S3_DATALAKE" + GCSDatalake = "GCS_DATALAKE" + AzureDatalake = "AZURE_DATALAKE" ) const ( @@ -206,18 +207,6 @@ type ColumnInfo struct { Type string } -type Destination struct { - Source backendconfig.SourceT - Destination backendconfig.DestinationT -} - -type Schema model.Schema - -type KeyValue struct { - Key string - Value interface{} -} - type GetLoadFilesOptions struct { Table string StartID int64 @@ -230,6 +219,13 @@ type LoadFile struct { Metadata json.RawMessage } +type ( + ModelWarehouse = model.Warehouse + ModelTableSchema = model.TableSchema + ModelValidationRequest = model.ValidationRequest + ModelValidationResponse = model.ValidationResponse +) + func IDResolutionEnabled() bool { return enableIDResolution } @@ -523,7 +519,8 @@ ToProviderCase converts string provided to case generally accepted in the wareho e.g. columns are uppercase in SNOWFLAKE and lowercase etc. in REDSHIFT, BIGQUERY etc */ func ToProviderCase(provider, str string) string { - if strings.ToUpper(provider) == SNOWFLAKE { + upperCaseProvider := strings.ToUpper(provider) + if upperCaseProvider == SNOWFLAKE || upperCaseProvider == SnowpipeStreaming { str = strings.ToUpper(str) } return str