From 958a6b2a1660b49e3ea4c53453196573b7d5526c Mon Sep 17 00:00:00 2001 From: achettyiitr Date: Mon, 28 Oct 2024 07:12:18 +0530 Subject: [PATCH] chore: some more tests --- .github/workflows/tests.yaml | 2 +- .../snowpipestreaming_test.go | 2225 ++++++++--------- ...docker-compose.rudder-snowpipe-clients.yml | 2 +- .../docker-compose.rudder-transformer.yml | 2 +- processor/transformer/transformer.go | 3 + .../snowpipestreaming/apiadapter.go | 125 +- .../snowpipestreaming/channel.go | 112 +- .../snowpipestreaming/columns.go | 4 +- .../snowpipestreaming/discards.go | 86 +- .../snowpipestreaming/internal/api/api.go | 6 +- .../internal/api/api_test.go | 67 +- .../internal/api/{errorcodes.go => codes.go} | 0 .../internal/api/createchannel.go | 7 +- .../internal/api/createchannel_test.go | 44 +- .../internal/api/deletechannel.go | 2 +- .../internal/api/getchannel.go | 3 +- .../internal/api/getchannel_test.go | 41 +- .../snowpipestreaming/internal/api/insert.go | 7 +- .../internal/api/insert_test.go | 4 +- .../snowpipestreaming/internal/api/status.go | 7 +- .../snowpipestreaming/internal/model/model.go | 72 +- .../internal/model/model_test.go | 100 +- .../snowpipestreaming/options.go | 2 - .../snowpipestreaming/poll.go | 105 +- .../snowpipestreaming/snowpipestreaming.go | 24 +- .../snowpipestreaming_test.go | 177 ++ ...docker-compose.rudder-snowpipe-clients.yml | 2 +- .../testhelper/testhelper.go | 13 +- .../snowpipestreaming/types.go | 56 +- .../snowpipestreaming/upload.go | 277 +- .../snowpipestreaming/uploadstats.go | 27 +- router/batchrouter/handle_async.go | 3 +- router/batchrouter/handle_lifecycle.go | 6 +- warehouse/integrations/manager/manager.go | 4 +- .../testdata/docker-compose.transformer.yml | 11 + warehouse/utils/reservedkeywords.go | 93 + warehouse/utils/uploader.go | 5 - warehouse/utils/utils.go | 45 +- 38 files changed, 2038 insertions(+), 1733 deletions(-) rename router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/{errorcodes.go => codes.go} (100%) create mode 100644 router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go create mode 100644 warehouse/integrations/testdata/docker-compose.transformer.yml diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2a441cfe29..e269066f03 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -122,7 +122,7 @@ jobs: go-version-file: 'go.mod' - run: go version - run: go mod download # Not required, used to segregate module download vs test times - - run: make test exclude="/rudder-server/(jobsdb|integration_test|processor|regulation-worker|router|services|suppression-backup-service|warehouse)" + - run: FORCE_RUN_INTEGRATION_TESTS=true make test exclude="/rudder-server/(jobsdb|integration_test|processor|regulation-worker|router|services|suppression-backup-service|warehouse)" - name: Upload coverage report uses: actions/upload-artifact@v4 with: diff --git a/integration_test/snowpipestreaming/snowpipestreaming_test.go b/integration_test/snowpipestreaming/snowpipestreaming_test.go index 4f1939f714..de97454e1f 100644 --- a/integration_test/snowpipestreaming/snowpipestreaming_test.go +++ b/integration_test/snowpipestreaming/snowpipestreaming_test.go @@ -4,84 +4,46 @@ import ( "bytes" "context" "database/sql" - "encoding/json" - "errors" "fmt" "io" "net/http" + "net/http/httptest" "os" "path" "strconv" - "strings" "testing" "time" + "github.com/google/uuid" + "github.com/iancoleman/strcase" "github.com/ory/dockertest/v3" - promClient "github.com/prometheus/client_model/go" - "github.com/rudderlabs/rudder-go-kit/stats/testhelper" "github.com/samber/lo" "github.com/stretchr/testify/require" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/compose-test/compose" "github.com/rudderlabs/compose-test/testcompose" + "github.com/rudderlabs/rudder-go-kit/bytesize" "github.com/rudderlabs/rudder-go-kit/config" kithttputil "github.com/rudderlabs/rudder-go-kit/httputil" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/postgres" - "github.com/rudderlabs/rudder-go-kit/testhelper/rand" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper" "github.com/rudderlabs/rudder-server/runner" "github.com/rudderlabs/rudder-server/testhelper/backendconfigtest" "github.com/rudderlabs/rudder-server/testhelper/health" - "github.com/rudderlabs/rudder-server/utils/httputil" "github.com/rudderlabs/rudder-server/utils/timeutil" "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" whth "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) -const ( - testKeyPairUnencrypted = "SNOWPIPE_STREAMING_KEYPAIR_UNENCRYPTED_INTEGRATION_TEST_CREDENTIALS" -) - -type testCredentials struct { - Account string `json:"account"` - User string `json:"user"` - Role string `json:"role"` - Database string `json:"database"` - Warehouse string `json:"warehouse"` - PrivateKey string `json:"privateKey"` - PrivateKeyPassphrase string `json:"privateKeyPassphrase"` -} - -func getSnowpipeTestCredentials(key string) (*testCredentials, error) { - cred, exists := os.LookupEnv(key) - if !exists { - return nil, errors.New("snowpipe test credentials not found") - } - - var credentials testCredentials - err := json.Unmarshal([]byte(cred), &credentials) - if err != nil { - return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) - } - return &credentials, nil -} - -func randSchema(provider string) string { // nolint:unparam - hex := strings.ToLower(rand.String(12)) - namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) - return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, - namespace, - )) -} - func TestSnowPipeStreaming(t *testing.T) { for _, key := range []string{ - testKeyPairUnencrypted, + testhelper.TestKeyPairUnencrypted, } { if _, exists := os.LookupEnv(key); !exists { if os.Getenv("FORCE_RUN_INTEGRATION_TESTS") == "true" { @@ -97,62 +59,24 @@ func TestSnowPipeStreaming(t *testing.T) { transformerURL := fmt.Sprintf("http://localhost:%d", c.Port("transformer", 9090)) snowPipeClientsURL := fmt.Sprintf("http://localhost:%d", c.Port("rudder-snowpipe-clients", 9078)) - keyPairUnEncryptedCredentials, err := getSnowpipeTestCredentials(testKeyPairUnencrypted) + credentials, err := testhelper.GetSnowPipeTestCredentials(testhelper.TestKeyPairUnencrypted) require.NoError(t, err) t.Run("namespace and table already exists", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -160,129 +84,60 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) - require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("namespace does not exists", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -294,118 +149,49 @@ func TestSnowPipeStreaming(t *testing.T) { require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("table does not exists", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -413,123 +199,55 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("events with different schema", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -537,129 +255,70 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}, "additional_column_%[1]s": "%[1]s"},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy.","additional_column_%[1]s": "%[1]s"}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_ADDITIONAL_COLUMN_1": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_2": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_3": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_4": "TEXT", "CONTEXT_ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"ADDITIONAL_COLUMN_1": "TEXT", "ADDITIONAL_COLUMN_2": "TEXT", "ADDITIONAL_COLUMN_3": "TEXT", "ADDITIONAL_COLUMN_4": "TEXT", "ADDITIONAL_COLUMN_5": "TEXT", "CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "USERS")) ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), ADDITIONAL_COLUMN_1, ADDITIONAL_COLUMN_2, ADDITIONAL_COLUMN_3, ADDITIONAL_COLUMN_4, ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "1", "", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "2", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "3", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "4", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts, "", "", "", "", "5"}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), CONTEXT_ADDITIONAL_COLUMN_1, CONTEXT_ADDITIONAL_COLUMN_2, CONTEXT_ADDITIONAL_COLUMN_3, CONTEXT_ADDITIONAL_COLUMN_4, CONTEXT_ADDITIONAL_COLUMN_5 FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "1", "", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "", "2", "", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "", "", "3", "", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "", "", "", "4", ""}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "", "", "", "", "5"}, - }, - identifiesRecords, - ) + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "1", "", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "", "2", "", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "", "", "3", "", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "", "", "", "4", ""}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "", "", "", "", "5"}, + }, produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("discards", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -667,165 +326,64 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables. Discards is not created") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("discards migration for reason", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -833,168 +391,67 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables. Discards is created without reason") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("discards migrated", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1002,177 +459,76 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables. Discards is created with reason") + t.Log("CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "int", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "int", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "int", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "int", "CONTEXT_REQUEST_IP": "int", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "int", "CONTEXT_REQUEST_IP": "int", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", })) eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "", "http", "", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) - discardsRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) - require.ElementsMatch(t, [][]string{ - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "IDENTIFIES", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "IDENTIFIES", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "3", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "4", "USERS", ts}, - {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_PASSED_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "USERS", ts}, - }, - discardsRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecordsForDiscards(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecordsForDiscards(source, destination), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - t.Run("don't re-create channel on loading twice when successful", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) prometheusPort, err := kithelper.GetFreePort() require.NoError(t, err) - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { + done := make(chan error) + go func() { + defer close(done) config.Set("enableStats", true) config.Set("RuntimeStats.enabled", false) config.Set("OpenTelemetry.enabled", true) config.Set("OpenTelemetry.metrics.prometheus.enabled", true) config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1180,172 +536,67 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", })) + t.Log("Sending 5 events") eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, strconv.Itoa(index+1), - "identify", ) } - - t.Log("Sending 5 events") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) t.Log("Sending 5 events again") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) - - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 10 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 20 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - metrics := getPrometheusMetrics(t, prometheusPort) - require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) - require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) - ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - }, - usersRecords, - ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - }, - identifiesRecords, - ) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecords(source, destination), tracksRecords(source, destination)...), tracksRecordsFromDB) cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) - - t.Run("many tables", func(t *testing.T) {}) - - t.Run("schema modified after channel creation (datatype changed)", func(t *testing.T) { - config.Reset() - defer config.Reset() - - pool, err := dockertest.NewPool("") - require.NoError(t, err) - postgresContainer, err := postgres.Setup(pool, t) - require.NoError(t, err) - - gwPort, err := kithelper.GetFreePort() - require.NoError(t, err) - prometheusPort, err := kithelper.GetFreePort() - require.NoError(t, err) - - namespace := randSchema(whutils.SNOWFLAKE) - - destination := backendconfigtest. - NewDestinationBuilder("SNOWPIPE_STREAMING"). - WithID("destination1"). - WithConfigOption("account", keyPairUnEncryptedCredentials.Account). - WithConfigOption("warehouse", keyPairUnEncryptedCredentials.Warehouse). - WithConfigOption("database", keyPairUnEncryptedCredentials.Database). - WithConfigOption("role", keyPairUnEncryptedCredentials.Role). - WithConfigOption("user", keyPairUnEncryptedCredentials.User). - WithConfigOption("useKeyPairAuth", true). - WithConfigOption("privateKey", keyPairUnEncryptedCredentials.PrivateKey). - WithConfigOption("privateKeyPassphrase", keyPairUnEncryptedCredentials.PrivateKeyPassphrase). - WithConfigOption("namespace", namespace). - WithRevisionID("destination1"). - Build() - source := backendconfigtest.NewSourceBuilder(). - WithID("source1"). - WithWriteKey("writekey1"). - WithConnection(destination). - Build() - bcServer := backendconfigtest.NewBuilder(). - WithWorkspaceConfig( - backendconfigtest.NewConfigBuilder(). - WithSource(source). - Build()). - Build() - defer bcServer.Close() + t.Run("many tables", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - wg, ctx := errgroup.WithContext(ctx) - wg.Go(func() error { - config.Set("enableStats", true) - config.Set("RuntimeStats.enabled", false) - config.Set("OpenTelemetry.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.enabled", true) - config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) - config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() - err := runRudderServer(ctx, gwPort, postgresContainer, bcServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) - if err != nil { - t.Logf("rudder-server exited with error: %v", err) - } - return err - }) - url := fmt.Sprintf("http://localhost:%d", gwPort) + url := fmt.Sprintf("http://localhost:%d", gatewayPort) health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) warehouse := whutils.ModelWarehouse{ @@ -1353,142 +604,733 @@ func TestSnowPipeStreaming(t *testing.T) { Destination: destination, } + t.Log("Creating schema and tables") sm := snowflake.New(config.New(), logger.NOP, stats.NOP) - require.NoError(t, err) require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) t.Cleanup(func() { sm.Cleanup(ctx) }) require.NoError(t, sm.CreateSchema(ctx)) - t.Cleanup(func() { dropSchema(t, sm.DB.DB, namespace) }) - require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ - "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", - })) - require.NoError(t, sm.CreateTable(ctx, "USERS", whutils.ModelTableSchema{ - "CONTEXT_DESTINATION_ID": "string", "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_PASSED_IP": "string", "CONTEXT_REQUEST_IP": "string", "RECEIVED_AT": "datetime", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "ID": "string", "UUID_TS": "datetime", "ORIGINAL_TIMESTAMP": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", - })) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) - eventFormat := func(index int) string { - return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, - strconv.Itoa(index+1), - "identify", - ) + for i := 0; i < 10; i++ { + eventFormat := func(int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","userId":"%[1]s","event":"Product Reviewed %[1]s","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(i+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) } - t.Log("Sending 5 events") - err = sendEvents(5, eventFormat, "writekey1", url) - require.NoError(t, err) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5*10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 2*5*10) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 5 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 10 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - t.Log("Schema modified") - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + expectedSchema := lo.SliceToMap( + lo.RepeatBy(10, func(index int) string { + return "PRODUCT_REVIEWED_" + strconv.Itoa(index+1) + }), + func(tableName string) (string, map[string]string) { + return tableName, map[string]string{"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"} + }, + ) + expectedSchema = lo.Assign(expectedSchema, map[string]map[string]string{ + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }) + require.Equal(t, expectedSchema, convertRecordsToSchema(schema)) + + for i := 0; i < 10; i++ { + productIDIndex := i + 1 + userID := strconv.Itoa(productIDIndex) + eventName := "Product Reviewed " + strconv.Itoa(productIDIndex) + tableName := strcase.ToSnake(eventName) + recordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED_"+strconv.Itoa(productIDIndex))) + ts := timeutil.Now().Format("2006-01-02") + + expectedProductReviewedRecords := [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + {destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts}, + } + require.Equal(t, expectedProductReviewedRecords, recordsFromDB) + } + + trackRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + expectedTrackRecords := lo.RepeatBy(50, func(index int) []string { + productIDIndex := index/5 + 1 + userID := strconv.Itoa(productIDIndex) + eventName := "Product Reviewed " + strconv.Itoa(productIDIndex) + tableName := strcase.ToSnake(eventName) + ts := timeutil.Now().Format("2006-01-02") + + return []string{destination.ID, "SNOWPIPE_STREAMING", source.ID, source.SourceDefinition.Name, tableName, eventName, "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", userID, ts} + }) + require.ElementsMatch(t, expectedTrackRecords, trackRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (schema deleted)", func(t *testing.T) { + prometheusPort, err := kithelper.GetFreePort() require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES DROP COLUMN CONTEXT_IP, CONTEXT_PASSED_IP, CONTEXT_REQUEST_IP;", namespace)) + + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.USERS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + t.Log("Schema modified, Dropping schema") + testhelper.DropSchema(t, sm.DB.DB, namespace) + + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 10) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, productReviewedRecords(source, destination), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (table deleted)", func(t *testing.T) { + prometheusPort, err := kithelper.GetFreePort() require.NoError(t, err) - _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.IDENTIFIES ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_PASSED_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + t.Log("Schema modified, Dropping table") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("DROP TABLE %q.%q;", namespace, "TRACKS")) require.NoError(t, err) t.Log("Sending 5 events again") - err = sendEvents(5, eventFormat, "writekey1", url) + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (columns deleted)", func(t *testing.T) { + prometheusPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("gw processedJobCount: %d", jobsCount) - return jobsCount == 10 - }, 20*time.Second, 1*time.Second, "all gw events should be successfully processed") - require.Eventually(t, func() bool { - var jobsCount int - require.NoError(t, postgresContainer.DB.QueryRow("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = 'succeeded'").Scan(&jobsCount)) - t.Logf("batch_rt succeeded: %d", jobsCount) - return jobsCount == 20 - }, 200*time.Second, 1*time.Second, "all events should be aborted in batch router") - - metrics := getPrometheusMetrics(t, prometheusPort) - require.Equal(t, 1, len(metrics["snowpipestreaming_create_channel_count"].GetMetric())) - require.Equal(t, float64(2), metrics["snowpipestreaming_create_channel_count"].GetMetric()[0].Counter.GetValue()) + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + t.Log("Schema modified, Dropping columns for TRACKS table") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) require.Equal(t, map[string]map[string]string{ - "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, - "USERS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "UUID_TS": "TIMESTAMP_TZ"}, + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, convertRecordsToSchema(schema), ) - usersRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "USERS")) ts := timeutil.Now().Format("2006-01-02") - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", ts}, + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + recordsBeforeDeletion := [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecords(source, destination), recordsBeforeDeletion...), tracksRecordsFromDB) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (datatype changed for all tables)", func(t *testing.T) { + prometheusPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", + })) + + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + t.Log("Schema modified, CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.PRODUCT_REVIEWED DROP COLUMN CONTEXT_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.PRODUCT_REVIEWED ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, err) + + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 10) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 20) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, - usersRecords, + convertRecordsToSchema(schema), ) - identifiesRecords := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) - require.ElementsMatch(t, [][]string{ - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, - {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecordsForDiscards(source, destination), productReviewedRecordsForDiscards(source, destination)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecordsForDiscards(source, destination), tracksRecordsForDiscards(source, destination)...), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardsRecords(), discardsRecordsInDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("schema modified after channel creation (datatype changed for partial tables)", func(t *testing.T) { + prometheusPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + config.Set("enableStats", true) + config.Set("RuntimeStats.enabled", false) + config.Set("OpenTelemetry.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.enabled", true) + config.Set("OpenTelemetry.metrics.prometheus.port", strconv.Itoa(prometheusPort)) + config.Set("OpenTelemetry.metrics.exportInterval", "10ms") + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, err) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "PRODUCT_REVIEWED", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "PRODUCT_ID": "string", "RATING": "int", "RECEIVED_AT": "datetime", "REVIEW_BODY": "string", "REVIEW_ID": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "TRACKS", whutils.ModelTableSchema{ + "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_IP": "string", "CONTEXT_REQUEST_IP": "string", "CONTEXT_SOURCE_ID": "string", "CONTEXT_SOURCE_TYPE": "string", "EVENT": "string", "EVENT_TEXT": "string", "ID": "string", "ORIGINAL_TIMESTAMP": "datetime", "RECEIVED_AT": "datetime", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "USER_ID": "string", "UUID_TS": "datetime", + })) + require.NoError(t, sm.CreateTable(ctx, "RUDDER_DISCARDS", whutils.ModelTableSchema{ + "COLUMN_NAME": "string", "COLUMN_VALUE": "string", "RECEIVED_AT": "datetime", "ROW_ID": "string", "TABLE_NAME": "string", "UUID_TS": "datetime", "REASON": "string", + })) + + t.Log("Sending 5 events") + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy."}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + t.Log("Schema modified, CONTEXT_IP, CONTEXT_REQUEST_IP are of type int") + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS DROP COLUMN CONTEXT_IP, CONTEXT_REQUEST_IP;", namespace)) + require.NoError(t, err) + _, err = sm.DB.DB.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.TRACKS ADD COLUMN CONTEXT_IP NUMBER, CONTEXT_REQUEST_IP NUMBER;", namespace)) + require.NoError(t, err) + + t.Log("Sending 5 events again") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "aborted", 5) + + t.Log("Sending 5 events again, should succeeded") + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 15) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 25) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "NUMBER", "CONTEXT_REQUEST_IP": "NUMBER", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.ElementsMatch(t, append(productReviewedRecords(source, destination), append(productReviewedRecords(source, destination), productReviewedRecords(source, destination)...)...), produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, append(tracksRecordsForDiscards(source, destination), tracksRecordsForDiscards(source, destination)...), tracksRecordsFromDB) + discardsRecordsInDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT COLUMN_NAME, COLUMN_VALUE, REASON, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), ROW_ID, TABLE_NAME, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "RUDDER_DISCARDS")) + require.ElementsMatch(t, discardTracksRecords(), discardsRecordsInDB) + + cancel() + require.NoError(t, <-done) + }) + t.Run("JSON columns", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + destination.Config["jsonPaths"] = "track.properties.jsonInfo" + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"type":"track","messageId":"%[1]s","userId":"%[1]s","event":"Product Reviewed","properties":{"review_id":"86ac1cd43","product_id":"9578257311","rating":3,"review_body":"OK for the price. It works but the material feels flimsy.", "jsonInfo": {"id":123,"name":"Test User","email":"testuser@example.com","isActive":true,"createdAt":"2023-10-01T12:34:56Z","profile":{"age":30,"address":{"street":"123 Test St","city":"Testville","zip":"12345"},"interests":["coding","reading","gaming"]}}}, "timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 10) + + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "PRODUCT_REVIEWED": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "PRODUCT_ID": "TEXT", "RATING": "NUMBER", "RECEIVED_AT": "TIMESTAMP_TZ", "REVIEW_BODY": "TEXT", "REVIEW_ID": "TEXT", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ", "JSON_INFO": "VARIANT"}, + "TRACKS": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "EVENT": "TEXT", "EVENT_TEXT": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, }, - identifiesRecords, + convertRecordsToSchema(schema), ) + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + produceReviewedRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, PRODUCT_ID, RATING, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), REVIEW_BODY, REVIEW_ID, SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD'), JSON_INFO FROM %q.%q;`, namespace, "PRODUCT_REVIEWED")) + require.Equal(t, [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts, "{\n \"createdAt\": \"2023-10-01T12:34:56Z\",\n \"email\": \"testuser@example.com\",\n \"id\": 123,\n \"isActive\": true,\n \"name\": \"Test User\",\n \"profile\": {\n \"address\": {\n \"city\": \"Testville\",\n \"street\": \"123 Test St\",\n \"zip\": \"12345\"\n },\n \"age\": 30,\n \"interests\": [\n \"coding\",\n \"reading\",\n \"gaming\"\n ]\n }\n}"}, + }, produceReviewedRecordsFromDB) + tracksRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_REQUEST_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, EVENT, EVENT_TEXT, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "TRACKS")) + require.ElementsMatch(t, tracksRecords(source, destination), tracksRecordsFromDB) + cancel() - _ = wg.Wait() + require.NoError(t, <-done) }) + t.Run("identify event should not contain users", func(t *testing.T) { + postgresContainer, gatewayPort := initializeTestEnvironment(t) + namespace := testhelper.RandSchema() + backendConfigServer, source, destination := setupBackendConfigTestServer(t, credentials, namespace) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error) + go func() { + defer close(done) + done <- runRudderServer(ctx, gatewayPort, postgresContainer, backendConfigServer.URL, transformerURL, snowPipeClientsURL, t.TempDir()) + }() + + url := fmt.Sprintf("http://localhost:%d", gatewayPort) + health.WaitUntilReady(ctx, t, url+"/health", 60*time.Second, 10*time.Millisecond, t.Name()) + + warehouse := whutils.ModelWarehouse{ + Namespace: namespace, + Destination: destination, + } + + t.Log("Creating schema and tables") + sm := snowflake.New(config.New(), logger.NOP, stats.NOP) + require.NoError(t, sm.Setup(ctx, warehouse, &whutils.NopUploader{})) + t.Cleanup(func() { sm.Cleanup(ctx) }) + require.NoError(t, sm.CreateSchema(ctx)) + t.Cleanup(func() { testhelper.DropSchema(t, sm.DB.DB, namespace) }) + require.NoError(t, sm.CreateTable(ctx, "IDENTIFIES", whutils.ModelTableSchema{ + "CONTEXT_IP": "string", "CONTEXT_LIBRARY_NAME": "string", "CONTEXT_SOURCE_TYPE": "string", "ORIGINAL_TIMESTAMP": "datetime", "UUID_TS": "datetime", "CONTEXT_DESTINATION_ID": "string", "CONTEXT_DESTINATION_TYPE": "string", "CONTEXT_PASSED_IP": "string", "SENT_AT": "datetime", "TIMESTAMP": "datetime", "CONTEXT_SOURCE_ID": "string", "CONTEXT_REQUEST_IP": "string", "ID": "string", "RECEIVED_AT": "datetime", "USER_ID": "string", + })) - t.Run("schema modified after channel creation (table deleted)", func(t *testing.T) {}) + eventFormat := func(index int) string { + return fmt.Sprintf(`{"batch":[{"messageId": "%[1]s", "userId":"%[1]s","type":"%[2]s","context":{"ip":"14.5.67.21","library":{"name":"http"}},"timestamp":"2020-02-02T00:23:09.544Z","sentAt":"2020-02-02T00:23:09.544Z","originalTimestamp":"2020-02-02T00:23:09.544Z","receivedAt":"2020-02-02T00:23:09.544Z"}]}`, + strconv.Itoa(index+1), + "identify", + ) + } + require.NoError(t, sendEvents(5, eventFormat, "writekey1", url)) + requireGatewayJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) + requireBatchRouterJobsCount(t, ctx, postgresContainer.DB, "succeeded", 5) - t.Run("schema modified after channel creation (schema deleted)", func(t *testing.T) {}) + schema := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT table_name, column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = '%s';`, namespace)) + require.Equal(t, map[string]map[string]string{ + "IDENTIFIES": {"CONTEXT_DESTINATION_ID": "TEXT", "CONTEXT_DESTINATION_TYPE": "TEXT", "CONTEXT_IP": "TEXT", "CONTEXT_LIBRARY_NAME": "TEXT", "CONTEXT_PASSED_IP": "TEXT", "CONTEXT_REQUEST_IP": "TEXT", "CONTEXT_SOURCE_ID": "TEXT", "CONTEXT_SOURCE_TYPE": "TEXT", "ID": "TEXT", "ORIGINAL_TIMESTAMP": "TIMESTAMP_TZ", "RECEIVED_AT": "TIMESTAMP_TZ", "SENT_AT": "TIMESTAMP_TZ", "TIMESTAMP": "TIMESTAMP_TZ", "USER_ID": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + "RUDDER_DISCARDS": {"COLUMN_NAME": "TEXT", "COLUMN_VALUE": "TEXT", "REASON": "TEXT", "RECEIVED_AT": "TIMESTAMP_TZ", "ROW_ID": "TEXT", "TABLE_NAME": "TEXT", "UUID_TS": "TIMESTAMP_TZ"}, + }, + convertRecordsToSchema(schema), + ) + + identifiesRecordsFromDB := whth.RetrieveRecordsFromWarehouse(t, sm.DB.DB, fmt.Sprintf(`SELECT CONTEXT_DESTINATION_ID, CONTEXT_DESTINATION_TYPE, CONTEXT_IP, CONTEXT_LIBRARY_NAME, CONTEXT_PASSED_IP, CONTEXT_SOURCE_ID, CONTEXT_SOURCE_TYPE, ID, ORIGINAL_TIMESTAMP, TO_CHAR(RECEIVED_AT, 'YYYY-MM-DD'), SENT_AT, TIMESTAMP, USER_ID, TO_CHAR(UUID_TS, 'YYYY-MM-DD') FROM %q.%q;`, namespace, "IDENTIFIES")) + require.ElementsMatch(t, identifiesRecords(source, destination), identifiesRecordsFromDB) - t.Run("schema modified after channel creation (columns deleted)", func(t *testing.T) {}) + cancel() + require.NoError(t, <-done) + }) } -func runRudderServer(ctx context.Context, port int, postgresContainer *postgres.Resource, cbURL, transformerURL, snowpipeClientsURL, tmpDir string) (err error) { +func initializeTestEnvironment(t testing.TB) (*postgres.Resource, int) { + t.Helper() + + config.Reset() + t.Cleanup(config.Reset) + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + postgresContainer, err := postgres.Setup(pool, t) + require.NoError(t, err) + + gatewayPort, err := kithelper.GetFreePort() + require.NoError(t, err) + + return postgresContainer, gatewayPort +} + +func setupBackendConfigTestServer( + t testing.TB, + credentials *testhelper.TestCredentials, + namespace string, +) ( + *httptest.Server, + backendconfig.SourceT, + backendconfig.DestinationT, +) { + t.Helper() + + destination := backendconfigtest. + NewDestinationBuilder("SNOWPIPE_STREAMING"). + WithID("destination1"). + WithConfigOption("account", credentials.Account). + WithConfigOption("warehouse", credentials.Warehouse). + WithConfigOption("database", credentials.Database). + WithConfigOption("role", credentials.Role). + WithConfigOption("user", credentials.User). + WithConfigOption("useKeyPairAuth", true). + WithConfigOption("privateKey", credentials.PrivateKey). + WithConfigOption("privateKeyPassphrase", credentials.PrivateKeyPassphrase). + WithConfigOption("namespace", namespace). + WithRevisionID("destination1"). + Build() + + source := backendconfigtest.NewSourceBuilder(). + WithID("source1"). + WithWriteKey("writekey1"). + WithConnection(destination). + Build() + + backendConfigServer := backendconfigtest.NewBuilder(). + WithWorkspaceConfig( + backendconfigtest.NewConfigBuilder(). + WithSource(source). + Build()). + Build() + t.Cleanup(backendConfigServer.Close) + return backendConfigServer, source, destination +} + +func runRudderServer( + ctx context.Context, + port int, + postgresContainer *postgres.Resource, + cbURL, transformerURL, snowpipeClientsURL, + tmpDir string, +) (err error) { + config.Set("INSTANCE_ID", "1") config.Set("CONFIG_BACKEND_URL", cbURL) config.Set("WORKSPACE_TOKEN", "token") + config.Set("DEST_TRANSFORM_URL", transformerURL) config.Set("DB.host", postgresContainer.Host) config.Set("DB.port", postgresContainer.Port) config.Set("DB.user", postgresContainer.User) config.Set("DB.name", postgresContainer.Database) config.Set("DB.password", postgresContainer.Password) - config.Set("DEST_TRANSFORM_URL", transformerURL) config.Set("SnowpipeStreaming.Client.URL", snowpipeClientsURL) - config.Set("BatchRouter.pollStatusLoopSleep", "1s") - config.Set("BatchRouter.asyncUploadTimeout", "1s") - config.Set("BatchRouter.asyncUploadWorkerTimeout", "1s") - config.Set("BatchRouter.mainLoopFreq", "1s") - config.Set("BatchRouter.uploadFreq", "1s") + config.Set("BatchRouter.SNOWPIPE_STREAMING.mainLoopFreq", "1s") // default 30s + config.Set("BatchRouter.SNOWPIPE_STREAMING.uploadFreq", "1s") // default 30s + config.Set("BatchRouter.SNOWPIPE_STREAMING.minIdleSleep", "1s") // default 2s + config.Set("BatchRouter.SNOWPIPE_STREAMING.maxEventsInABatch", 10000) // default 10000 + config.Set("BatchRouter.SNOWPIPE_STREAMING.maxPayloadSizeInBytes", 512*bytesize.KB) // default 10kb + config.Set("BatchRouter.SNOWPIPE_STREAMING.asyncUploadWorkerTimeout", "1s") // default 10s + config.Set("BatchRouter.SNOWPIPE_STREAMING.asyncUploadTimeout", "1s") // default 30m + config.Set("BatchRouter.SNOWPIPE_STREAMING.pollStatusLoopSleep", "1s") // default 10s config.Set("BatchRouter.isolationMode", "none") - config.Set("Warehouse.mode", "off") config.Set("DestinationDebugger.disableEventDeliveryStatusUploads", true) config.Set("SourceDebugger.disableEventUploads", true) @@ -1497,8 +1339,6 @@ func runRudderServer(ctx context.Context, port int, postgresContainer *postgres. config.Set("JobsDB.migrateDSLoopSleepDuration", "60m") config.Set("archival.Enabled", false) config.Set("Reporting.syncer.enabled", false) - config.Set("BatchRouter.mainLoopFreq", "1s") - config.Set("BatchRouter.uploadFreq", "1s") config.Set("Gateway.webPort", strconv.Itoa(port)) config.Set("RUDDER_TMPDIR", os.TempDir()) config.Set("recovery.storagePath", path.Join(tmpDir, "/recovery_data.json")) @@ -1511,16 +1351,20 @@ func runRudderServer(ctx context.Context, port int, postgresContainer *postgres. err = fmt.Errorf("panicked: %v", r) } }() - r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN"}) - c := r.Run(ctx, - []string{"proc-isolation-test-rudder-server"}) + r := runner.New(runner.ReleaseInfo{EnterpriseToken: "TOKEN", Version: uuid.NewString()}) + c := r.Run(ctx, []string{"snowpipe-streaming-rudder-server"}) if c != 0 { err = fmt.Errorf("rudder-server exited with a non-0 exit code: %d", c) } return } -func sendEvents(num int, eventFormat func(index int) string, writeKey, url string) error { // nolint:unparam +// nolint:unparam +func sendEvents( + num int, + eventFormat func(index int) string, + writeKey, url string, +) error { for i := 0; i < num; i++ { payload := []byte(eventFormat(i)) req, err := http.NewRequest(http.MethodPost, url+"/v1/batch", bytes.NewReader(payload)) @@ -1541,21 +1385,59 @@ func sendEvents(num int, eventFormat func(index int) string, writeKey, url strin return nil } -func dropSchema(t *testing.T, db *sql.DB, namespace string) { +// nolint:unparam +func requireGatewayJobsCount( + t testing.TB, + ctx context.Context, + db *sql.DB, + status string, + expectedCount int, +) { t.Helper() - t.Log("dropping schema", namespace) + t.Log("Verifying gateway jobs count") + query := fmt.Sprintf("SELECT count(*) FROM unionjobsdbmetadata('gw',1) WHERE job_state = '%s'", status) + count := 0 require.Eventually(t, func() bool { - _, err := db.ExecContext(context.Background(), fmt.Sprintf(`DROP SCHEMA %q CASCADE;`, namespace)) + err := db.QueryRowContext(ctx, query).Scan(&count) if err != nil { - t.Logf("error deleting schema %q: %v", namespace, err) + t.Log("Error while querying for jobs count: ", err) return false } - return true + t.Logf("require gateway count: %d, expected: %d", count, expectedCount) + return count == expectedCount }, - time.Minute, - time.Second, + 20*time.Second, + 1*time.Second, + ) +} + +// nolint:unparam +func requireBatchRouterJobsCount( + t testing.TB, + ctx context.Context, + db *sql.DB, + status string, + expectedCount int, +) { + t.Helper() + t.Log("Verifying batch router jobs count") + + query := fmt.Sprintf("SELECT count(*) FROM unionjobsdbmetadata('batch_rt',1) WHERE job_state = '%s'", status) + count := 0 + require.Eventually(t, + func() bool { + err := db.QueryRowContext(ctx, query).Scan(&count) + if err != nil { + t.Log("Error while querying for jobs count: ", err) + return false + } + t.Logf("require batch router count: %d, expected: %d", count, expectedCount) + return count == expectedCount + }, + 200*time.Second, + 1*time.Second, ) } @@ -1569,37 +1451,116 @@ func convertRecordsToSchema(input [][]string) map[string]map[string]string { }) } -func getPrometheusMetrics(t *testing.T, prometheusPort int, requiredMetrics ...string) map[string]*promClient.MetricFamily { - t.Helper() +func tracksRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - buf := make([]byte, 0) - url := fmt.Sprintf("http://localhost:%d/metrics", prometheusPort) +func tracksRecordsForDiscards( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - require.Eventuallyf(t, func() bool { - resp, err := http.Get(url) - if err != nil { - t.Logf("Failed to fetch metrics: %v", err) - return false - } - defer httputil.CloseResponse(resp) +func productReviewedRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "[::1]", "[::1]", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - buf, err = io.ReadAll(resp.Body) - if err != nil { - t.Logf("Failed to read response body: %v", err) - return false - } +func productReviewedRecordsForDiscards( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + eventName := "Product Reviewed" + tableName := strcase.ToSnake(eventName) + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "1", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "2", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "3", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "4", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "", "", source.ID, source.SourceDefinition.Name, tableName, eventName, "5", "2020-02-02T00:23:09Z", "9578257311", "3", ts, "OK for the price. It works but the material feels flimsy.", "86ac1cd43", "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - bufString := string(buf) - for _, metric := range requiredMetrics { - if !strings.Contains(bufString, metric) { - return false - } - } - return true - }, time.Minute, 100*time.Millisecond, "Cannot find metrics in time: %s", buf) +func identifiesRecords( + source backendconfig.SourceT, + destination backendconfig.DestinationT, +) [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "1", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "1", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "2", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "2", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "3", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "3", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "4", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "4", ts}, + {destination.ID, "SNOWPIPE_STREAMING", "14.5.67.21", "http", "14.5.67.21", source.ID, source.SourceDefinition.Name, "5", "2020-02-02T00:23:09Z", ts, "2020-02-02T00:23:09Z", "2020-02-02T00:23:09Z", "5", ts}, + } +} - metrics, err := testhelper.ParsePrometheusMetrics(bytes.NewBuffer(buf)) - require.NoError(t, err) +func discardsRecords() [][]string { + return append(discardProductReviewedRecords(), discardTracksRecords()...) +} - return metrics +func discardProductReviewedRecords() [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "PRODUCT_REVIEWED", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "PRODUCT_REVIEWED", ts}, + } +} + +func discardTracksRecords() [][]string { + ts := timeutil.Now().Format("2006-01-02") + return [][]string{ + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "3", "TRACKS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "3", "TRACKS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "4", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "4", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "5", "TRACKS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "5", "TRACKS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "1", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "1", "TRACKS", ts}, + {"CONTEXT_REQUEST_IP", "", "incompatible schema conversion from int to string", ts, "2", "TRACKS", ts}, + {"CONTEXT_IP", "", "incompatible schema conversion from int to string", ts, "2", "TRACKS", ts}, + } } diff --git a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml index 0c9f5ea530..4c223f96b6 100644 --- a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -2,7 +2,7 @@ version: "3.9" services: rudder-snowpipe-clients: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + image: "rudderstack/rudder-snowpipe-clients:develop" ports: - "9078" healthcheck: diff --git a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml index 3141df23b2..f822bf374b 100644 --- a/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml +++ b/integration_test/snowpipestreaming/testdata/docker-compose.rudder-transformer.yml @@ -2,7 +2,7 @@ version: "3.9" services: transformer: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/develop-rudder-transformer:latest" + image: "rudderstack/develop-rudder-transformer:fix.snowpipe-streaming-users" ports: - "9090:9090" healthcheck: diff --git a/processor/transformer/transformer.go b/processor/transformer/transformer.go index 45164410ef..2368576d1e 100644 --- a/processor/transformer/transformer.go +++ b/processor/transformer/transformer.go @@ -537,6 +537,9 @@ func (trans *handle) destTransformURL(destType string) string { return destinationEndPoint + "?" + whSchemaVersionQueryParam } } + if destType == warehouseutils.SnowpipeStreaming { + return destinationEndPoint + "?" + fmt.Sprintf("whSchemaVersion=%s&whIDResolve=%v", trans.conf.GetString("Warehouse.schemaVersion", "v1"), warehouseutils.IDResolutionEnabled()) + } return destinationEndPoint } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go index 4fd53b975d..68b2331d80 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/apiadapter.go @@ -2,70 +2,111 @@ package snowpipestreaming import ( "context" + "strconv" + "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" ) -type apiAdapter struct { - stats struct { - createChannelCount stats.Counter - deleteChannelCount stats.Counter - insertCount stats.Counter - statusCount stats.Counter - createChannelResponseTime stats.Timer - deleteChannelResponseTime stats.Timer - insertResponseTime stats.Timer - statusResponseTime stats.Timer +func newApiAdapter( + logger logger.Logger, + statsFactory stats.Stats, + api api, + destination *backendconfig.DestinationT, +) api { + return &apiAdapter{ + logger: logger, + statsFactory: statsFactory, + destination: destination, + api: api, } - - api } -func newApiAdapter(api api, statsFactory stats.Stats, destination *backendconfig.DestinationT) *apiAdapter { - adapter := &apiAdapter{} - adapter.api = api - - tags := stats.Tags{ +func (a *apiAdapter) defaultTags() stats.Tags { + return stats.Tags{ "module": "batch_router", - "workspaceId": destination.WorkspaceID, - "destType": destination.DestinationDefinition.Name, - "destinationId": destination.ID, + "workspaceId": a.destination.WorkspaceID, + "destType": a.destination.DestinationDefinition.Name, + "destinationId": a.destination.ID, } - adapter.stats.createChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_count", stats.CountType, tags) - adapter.stats.deleteChannelCount = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_count", stats.CountType, tags) - adapter.stats.insertCount = statsFactory.NewTaggedStat("snowpipestreaming_insert_count", stats.CountType, tags) - adapter.stats.statusCount = statsFactory.NewTaggedStat("snowpipestreaming_status_count", stats.CountType, tags) - adapter.stats.createChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_response_time", stats.TimerType, tags) - adapter.stats.deleteChannelResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_delete_channel_response_time", stats.TimerType, tags) - adapter.stats.insertResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_insert_response_time", stats.TimerType, tags) - adapter.stats.statusResponseTime = statsFactory.NewTaggedStat("snowpipestreaming_status_response_time", stats.TimerType, tags) - - return adapter } func (a *apiAdapter) CreateChannel(ctx context.Context, req *model.CreateChannelRequest) (*model.ChannelResponse, error) { - defer a.stats.createChannelCount.Increment() - defer a.stats.createChannelResponseTime.RecordDuration()() - return a.api.CreateChannel(ctx, req) + a.logger.Infon("Creating channel", + logger.NewStringField("rudderIdentifier", req.RudderIdentifier), + logger.NewStringField("partition", req.Partition), + logger.NewStringField("database", req.TableConfig.Database), + logger.NewStringField("namespace", req.TableConfig.Schema), + logger.NewStringField("table", req.TableConfig.Table), + ) + tags := a.defaultTags() + tags["api"] = "create_channel" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.CreateChannel(ctx, req) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + tags["code"] = resp.Code + return resp, nil } func (a *apiAdapter) DeleteChannel(ctx context.Context, channelID string, sync bool) error { - defer a.stats.deleteChannelCount.Increment() - defer a.stats.deleteChannelResponseTime.RecordDuration()() - return a.api.DeleteChannel(ctx, channelID, sync) + a.logger.Infon("Deleting channel", + logger.NewStringField("channelId", channelID), + logger.NewBoolField("sync", sync), + ) + tags := a.defaultTags() + tags["api"] = "delete_channel" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + err := a.api.DeleteChannel(ctx, channelID, sync) + if err != nil { + tags["status"] = "false" + return err + } + tags["status"] = "true" + return nil } func (a *apiAdapter) Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) { - defer a.stats.insertCount.Increment() - defer a.stats.insertResponseTime.RecordDuration()() - return a.api.Insert(ctx, channelID, insertRequest) + tags := a.defaultTags() + tags["api"] = "insert" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.Insert(ctx, channelID, insertRequest) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + tags["code"] = resp.Code + return resp, nil } func (a *apiAdapter) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { - defer a.stats.statusCount.Increment() - defer a.stats.statusResponseTime.RecordDuration()() - return a.api.Status(ctx, channelID) + tags := a.defaultTags() + tags["api"] = "status" + + responseTimeStat := a.statsFactory.NewTaggedStat("snowpipe_streaming_api_response_time", stats.TimerType, tags) + defer responseTimeStat.RecordDuration()() + + resp, err := a.api.Status(ctx, channelID) + if err != nil { + tags["status"] = "false" + return nil, err + } + tags["status"] = strconv.FormatBool(resp.Success) + return resp, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go index 7f7042866c..8b45e8801f 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/channel.go @@ -4,17 +4,52 @@ import ( "context" "fmt" - "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" + "github.com/rudderlabs/rudder-go-kit/logger" + internalapi "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) +func (m *Manager) prepareChannelResponse( + ctx context.Context, + destinationID string, + destConf *destConfig, + tableName string, + eventSchema whutils.ModelTableSchema, +) (*model.ChannelResponse, error) { + channelResponse, err := m.createChannel(ctx, destinationID, destConf, tableName, eventSchema) + if err != nil { + return nil, fmt.Errorf("creating channel for table %s: %w", tableName, err) + } + + columnInfos := findNewColumns(eventSchema, channelResponse.SnowPipeSchema) + if len(columnInfos) > 0 { + m.logger.Infon("Adding columns", + logger.NewStringField("table", tableName), + logger.NewIntField("columns", int64(len(columnInfos))), + ) + + if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { + return nil, fmt.Errorf("adding columns for table %s: %w", tableName, err) + } + + channelResponse, err = m.recreateChannel(ctx, destinationID, destConf, tableName, eventSchema, channelResponse.ChannelID) + if err != nil { + return nil, fmt.Errorf("recreating channel for table %s: %w", tableName, err) + } + } + return channelResponse, nil +} + +// createChannel creates a new channel for importing data to Snowpipe. +// If the channel already exists in the cache, it returns the cached response. +// Otherwise, it sends a request to create a new channel and handles potential errors. func (m *Manager) createChannel( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + rudderIdentifier string, + destConf *destConfig, tableName string, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { @@ -23,7 +58,7 @@ func (m *Manager) createChannel( } req := &model.CreateChannelRequest{ - RudderIdentifier: asyncDest.Destination.ID, + RudderIdentifier: rudderIdentifier, Partition: m.config.instanceID, AccountConfig: model.AccountConfig{ Account: destConf.Account, @@ -41,7 +76,7 @@ func (m *Manager) createChannel( resp, err := m.api.CreateChannel(ctx, req) if err != nil { - return nil, fmt.Errorf("creating channel: %v", err) + return nil, fmt.Errorf("creating channel: %w", err) } if resp.Success { m.channelCache.Store(tableName, resp) @@ -52,94 +87,113 @@ func (m *Manager) createChannel( case internalapi.ErrSchemaDoesNotExistOrNotAuthorized: resp, err = m.handleSchemaError(ctx, req, eventSchema) if err != nil { - return nil, fmt.Errorf("handling schema error: %v", err) + return nil, fmt.Errorf("creating channel for schema error: %w", err) } if !resp.Success { - return nil, fmt.Errorf("creating channel for schema error: %s", resp.Error) + return nil, fmt.Errorf("creating channel for schema error with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } m.channelCache.Store(tableName, resp) return resp, nil case internalapi.ErrTableDoesNotExistOrNotAuthorized: resp, err = m.handleTableError(ctx, req, eventSchema) if err != nil { - return nil, fmt.Errorf("handling table error: %v", err) + return nil, fmt.Errorf("creating channel for table error: %w", err) } if !resp.Success { - return nil, fmt.Errorf("creating channel for table error: %s", resp.Error) + return nil, fmt.Errorf("creating channel for table error with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } m.channelCache.Store(tableName, resp) return resp, nil default: - return nil, fmt.Errorf("creating channel: %v", err) + return nil, fmt.Errorf("creating channel with code %s, message: %s and error: %s", resp.Code, resp.SnowflakeAPIMessage, resp.Error) } } +// handleSchemaError handles errors related to missing schemas. +// It creates the necessary schema and table, then attempts to create the channel again. func (m *Manager) handleSchemaError( ctx context.Context, channelReq *model.CreateChannelRequest, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { - m.stats.channelSchemaCreationErrorCount.Increment() + m.logger.Infon("Handling schema error", + logger.NewStringField("schema", channelReq.TableConfig.Schema), + logger.NewStringField("table", channelReq.TableConfig.Table), + ) snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err := snowflakeManager.CreateSchema(ctx); err != nil { - return nil, fmt.Errorf("creating schema: %v", err) + return nil, fmt.Errorf("creating schema: %w", err) } if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { - return nil, fmt.Errorf("creating table: %v", err) + return nil, fmt.Errorf("creating table: %w", err) } return m.api.CreateChannel(ctx, channelReq) } +// handleTableError handles errors related to missing tables. +// It creates the necessary table and then attempts to create the channel again. func (m *Manager) handleTableError( ctx context.Context, channelReq *model.CreateChannelRequest, eventSchema whutils.ModelTableSchema, ) (*model.ChannelResponse, error) { - m.stats.channelTableCreationErrorCount.Increment() + m.logger.Infon("Handling table error", + logger.NewStringField("schema", channelReq.TableConfig.Schema), + logger.NewStringField("table", channelReq.TableConfig.Table), + ) snowflakeManager, err := m.createSnowflakeManager(ctx, channelReq.TableConfig.Schema) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err := snowflakeManager.CreateTable(ctx, channelReq.TableConfig.Table, eventSchema); err != nil { - return nil, fmt.Errorf("creating table: %v", err) + return nil, fmt.Errorf("creating table: %w", err) } return m.api.CreateChannel(ctx, channelReq) } +// recreateChannel deletes an existing channel and then creates a new one. +// It returns the new channel response or an error if the process fails. func (m *Manager) recreateChannel( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + destinationID string, + destConf *destConfig, tableName string, eventSchema whutils.ModelTableSchema, - existingChannelResponse *model.ChannelResponse, + existingChannelID string, ) (*model.ChannelResponse, error) { - if err := m.deleteChannel(ctx, tableName, existingChannelResponse.ChannelID); err != nil { - return nil, fmt.Errorf("deleting channel: %v", err) + m.logger.Infon("Recreating channel", + logger.NewStringField("destinationID", destinationID), + logger.NewStringField("tableName", tableName), + ) + + if err := m.deleteChannel(ctx, tableName, existingChannelID); err != nil { + return nil, fmt.Errorf("deleting channel: %w", err) } - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + channelResponse, err := m.createChannel(ctx, destinationID, destConf, tableName, eventSchema) if err != nil { - return nil, fmt.Errorf("recreating channel: %v", err) + return nil, fmt.Errorf("recreating channel: %w", err) } return channelResponse, nil } -func (m *Manager) deleteChannel(ctx context.Context, tableName string, channelID string) error { +// deleteChannel removes a channel from the cache and deletes it from the Snowpipe. +// It returns an error if the deletion fails. +func (m *Manager) deleteChannel(ctx context.Context, tableName, channelID string) error { m.channelCache.Delete(tableName) if err := m.api.DeleteChannel(ctx, channelID, true); err != nil { - return fmt.Errorf("deleting channel: %v", err) + return fmt.Errorf("deleting channel: %w", err) } return nil } @@ -154,13 +208,13 @@ func (m *Manager) createSnowflakeManager(ctx context.Context, namespace string) } modelWarehouse.Destination.Config["useKeyPairAuth"] = true // Since we are currently only supporting key pair auth - sf, err := manager.New(whutils.SNOWFLAKE, m.conf, m.logger, m.statsFactory) + sf, err := manager.New(whutils.SnowpipeStreaming, m.conf, m.logger, m.statsFactory) if err != nil { - return nil, fmt.Errorf("creating snowflake manager: %v", err) + return nil, fmt.Errorf("creating snowflake manager: %w", err) } err = sf.Setup(ctx, modelWarehouse, &whutils.NopUploader{}) if err != nil { - return nil, fmt.Errorf("setting up snowflake manager: %v", err) + return nil, fmt.Errorf("setting up snowflake manager: %w", err) } return sf, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go index a3173e8de1..fc6be3fb7d 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/columns.go @@ -10,13 +10,13 @@ import ( func (m *Manager) addColumns(ctx context.Context, namespace, tableName string, columns []whutils.ColumnInfo) error { snowflakeManager, err := m.createSnowflakeManager(ctx, namespace) if err != nil { - return fmt.Errorf("creating snowflake manager: %v", err) + return fmt.Errorf("creating snowflake manager: %w", err) } defer func() { snowflakeManager.Cleanup(ctx) }() if err = snowflakeManager.AddColumns(ctx, tableName, columns); err != nil { - return fmt.Errorf("adding columns: %v", err) + return fmt.Errorf("adding columns: %w", err) } return nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go index 551f98cec8..a7390acd3e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/discards.go @@ -5,101 +5,75 @@ import ( "fmt" "strconv" - obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" "github.com/samber/lo" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" "github.com/rudderlabs/rudder-server/warehouse/slave" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) -func (m *Manager) loadDiscardsToSnowPipe( +// sendDiscardEVentsToSnowpipe uploads discarded records to the Snowpipe table. +// It creates a channel for the upload, adds any new columns needed, and inserts the discard data. +func (m *Manager) sendDiscardEVentsToSnowpipe( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, + info *uploadInfo, discardInfos []discardInfo, -) (*uploadInfo, error) { - tableName, eventSchema := discardsTable(), discardsSchema() +) (*importInfo, error) { + tableName := discardsTable() + offset := strconv.FormatInt(info.latestJobID, 10) log := m.logger.Withn( logger.NewStringField("table", tableName), logger.NewIntField("events", int64(len(discardInfos))), + logger.NewStringField("offset", offset), ) - log.Infon("Uploading data to table") - - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) - if err != nil { - return nil, fmt.Errorf("creating channel: %v", err) - } - - columnInfos := findNewColumns(eventSchema, channelResponse.SnowPipeSchema()) - if len(columnInfos) > 0 { - if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { - return nil, fmt.Errorf("adding columns: %v", err) - } - - channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) - if err != nil { - return nil, fmt.Errorf("recreating channel: %v", err) - } - } - - offset := strconv.FormatInt(m.now().Unix(), 10) insertReq := &model.InsertRequest{ - Rows: createRowsFromDiscardInfos(discardInfos), + Rows: discardRows(discardInfos), Offset: offset, } - insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) - if err != nil { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - m.logger.Warnn("Failed to delete channel", - logger.NewStringField("table", tableName), - obskit.Error(deleteErr), - ) + insertRes, err := m.api.Insert(ctx, info.discardChannelResponse.ChannelID, insertReq) + defer func() { + if err != nil || !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, tableName, info.discardChannelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } } - return nil, fmt.Errorf("inserting data: %v", err) + }() + if err != nil { + return nil, fmt.Errorf("inserting data to discards: %v", err) } if !insertRes.Success { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - m.logger.Warnn("Failed to delete channel", - logger.NewStringField("table", tableName), - obskit.Error(deleteErr), - ) - } return nil, errInsertingDataFailed } - m.logger.Infon("Successfully uploaded data to table", - logger.NewStringField("table", tableName), - logger.NewIntField("events", int64(len(discardInfos))), - ) - m.stats.discardCount.Count(len(discardInfos)) - idOffset := &uploadInfo{ - ChannelID: channelResponse.ChannelID, + imInfo := &importInfo{ + ChannelID: info.discardChannelResponse.ChannelID, Offset: offset, Table: tableName, + Count: len(discardInfos), } - return idOffset, nil + return imInfo, nil } func discardsTable() string { - return whutils.ToProviderCase(whutils.SNOWFLAKE, whutils.DiscardsTable) + return whutils.ToProviderCase(whutils.SnowpipeStreaming, whutils.DiscardsTable) } func discardsSchema() whutils.ModelTableSchema { return lo.MapEntries(whutils.DiscardsSchema, func(colName, colType string) (string, string) { - return whutils.ToProviderCase(whutils.SNOWFLAKE, colName), colType + return whutils.ToProviderCase(whutils.SnowpipeStreaming, colName), colType }) } -func createRowsFromDiscardInfos(discardInfos []discardInfo) []model.Row { +func discardRows(discardInfos []discardInfo) []model.Row { return lo.FilterMap(discardInfos, func(info discardInfo, _ int) (model.Row, bool) { - id, idExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "id")] - receivedAt, receivedAtExists := info.eventData[whutils.ToProviderCase(whutils.SNOWFLAKE, "received_at")] + id, idExists := info.eventData[whutils.ToProviderCase(whutils.SnowpipeStreaming, "id")] + receivedAt, receivedAtExists := info.eventData[whutils.ToProviderCase(whutils.SnowpipeStreaming, "received_at")] if !idExists || !receivedAtExists { return nil, false @@ -118,7 +92,7 @@ func createRowsFromDiscardInfos(discardInfos []discardInfo) []model.Row { } func discardedRecords( - event event, + event *event, snowPipeSchema whutils.ModelTableSchema, tableName string, formattedTS string, diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go index 7d0a8aa7fb..bbb15388ca 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api.go @@ -3,6 +3,8 @@ package api import ( "io" "net/http" + + jsoniter "github.com/json-iterator/go" ) type API struct { @@ -14,6 +16,8 @@ type requestDoer interface { Do(*http.Request) (*http.Response, error) } +var json = jsoniter.ConfigCompatibleWithStandardLibrary + func New(clientURL string, requestDoer requestDoer) *API { return &API{ clientURL: clientURL, @@ -21,7 +25,7 @@ func New(clientURL string, requestDoer requestDoer) *API { } } -func mustReadAll(r io.Reader) []byte { +func mustRead(r io.Reader) []byte { data, _ := io.ReadAll(r) return data } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go index 70d363d8bc..b156415e11 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/api_test.go @@ -48,12 +48,12 @@ func (nopReadCloser) Close() error { func TestMustReadAll(t *testing.T) { t.Run("ReadAll", func(t *testing.T) { r := strings.NewReader("hello") - data := mustReadAll(r) + data := mustRead(r) require.Equal(t, []byte("hello"), data) }) t.Run("ReadAll error", func(t *testing.T) { r := iotest.ErrReader(errors.New("error")) - data := mustReadAll(r) + data := mustRead(r) require.Empty(t, data) }) } @@ -79,7 +79,7 @@ func TestAPI(t *testing.T) { ctx := context.Background() - namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + namespace := testhelper.RandSchema() table := "TEST_TABLE" tableSchema := whutils.ModelTableSchema{ "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", @@ -133,63 +133,8 @@ func TestAPI(t *testing.T) { require.NotEmpty(t, createChannelRes.ChannelID) require.True(t, createChannelRes.Valid) require.False(t, createChannelRes.Deleted) - require.EqualValues(t, map[string]map[string]interface{}{ - "ACTIVE": { - "byteLength": nil, - "length": nil, - "logicalType": "BOOLEAN", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "BOOLEAN", - }, - "AGE": { - "byteLength": nil, - "length": nil, - "logicalType": "FIXED", - "nullable": true, - "precision": float64(38), - "scale": float64(0), - "type": "NUMBER(38,0)", - }, - "DOB": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - "EMAIL": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "NAME": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - }, - createChannelRes.TableSchema, + require.EqualValues(t, whutils.ModelTableSchema{"ACTIVE": "boolean", "AGE": "int", "DOB": "datetime", "EMAIL": "string", "ID": "string", "NAME": "string"}, + createChannelRes.SnowPipeSchema, ) t.Log("Getting channel") @@ -248,7 +193,7 @@ func TestAPI(t *testing.T) { ctx := context.Background() - namespace := testhelper.RandSchema(whutils.SNOWFLAKE) + namespace := testhelper.RandSchema() table := "TEST_TABLE" tableSchema := whutils.ModelTableSchema{ "ID": "string", "NAME": "string", "EMAIL": "string", "AGE": "int", "ACTIVE": "boolean", "DOB": "datetime", diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/codes.go similarity index 100% rename from router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/errorcodes.go rename to router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/codes.go diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go index 6553df16f5..166de72f97 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "fmt" "net/http" @@ -18,8 +17,8 @@ func (a *API) CreateChannel(ctx context.Context, channelReq *model.CreateChannel return nil, fmt.Errorf("marshalling create channel request: %w", err) } - channelReqURL := a.clientURL + "/channels" - req, err := http.NewRequestWithContext(ctx, http.MethodPost, channelReqURL, bytes.NewBuffer(reqJSON)) + craeteChannelURL := a.clientURL + "/channels" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, craeteChannelURL, bytes.NewBuffer(reqJSON)) if err != nil { return nil, fmt.Errorf("creating create channel request: %w", err) } @@ -32,7 +31,7 @@ func (a *API) CreateChannel(ctx context.Context, channelReq *model.CreateChannel defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for create channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for create channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.ChannelResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go index dc7975e6d6..98063f0c03 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/createchannel_test.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "errors" "io" "net/http" @@ -13,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestCreateChannel(t *testing.T) { @@ -84,41 +84,13 @@ func TestCreateChannel(t *testing.T) { res, err := manager.CreateChannel(ctx, ccr) require.NoError(t, err) require.EqualValues(t, &model.ChannelResponse{ - Success: true, - ChannelID: "channelId", - ChannelName: "channelName", - ClientName: "clientName", - Valid: true, - Deleted: false, - TableSchema: map[string]map[string]interface{}{ - "EVENT": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "TIMESTAMP": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - }, + Success: true, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{"EVENT": "string", "ID": "string", "TIMESTAMP": "datetime"}, }, res, ) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go index f95c1d14fe..99d241ff58 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/deletechannel.go @@ -28,7 +28,7 @@ func (a *API) DeleteChannel(ctx context.Context, channelID string, sync bool) er defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusAccepted { - return fmt.Errorf("invalid status code for delete channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return fmt.Errorf("invalid status code for delete channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } return nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go index ba9210c209..aa135cc159 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel.go @@ -2,7 +2,6 @@ package api import ( "context" - "encoding/json" "fmt" "net/http" @@ -26,7 +25,7 @@ func (a *API) GetChannel(ctx context.Context, channelID string) (*model.ChannelR defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for get channel: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for get channel: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.ChannelResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go index 8dc2d654b4..bfaef2861e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/getchannel_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) func TestGetChannel(t *testing.T) { @@ -37,40 +38,12 @@ func TestGetChannel(t *testing.T) { res, err := manager.GetChannel(ctx, channelID) require.NoError(t, err) require.EqualValues(t, &model.ChannelResponse{ - ChannelID: "channelId", - ChannelName: "channelName", - ClientName: "clientName", - Valid: true, - Deleted: false, - TableSchema: map[string]map[string]interface{}{ - "EVENT": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "ID": { - "byteLength": 1.6777216e+07, - "length": 1.6777216e+07, - "logicalType": "TEXT", - "nullable": true, - "precision": nil, - "scale": nil, - "type": "VARCHAR(16777216)", - }, - "TIMESTAMP": { - "byteLength": nil, - "length": nil, - "logicalType": "TIMESTAMP_TZ", - "nullable": true, - "precision": float64(0), - "scale": float64(9), - "type": "TIMESTAMP_TZ(9)", - }, - }, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{"EVENT": "string", "ID": "string", "TIMESTAMP": "datetime"}, }, res, ) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go index 9607bc772a..b15b3424e6 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "fmt" "net/http" @@ -17,8 +16,8 @@ func (a *API) Insert(ctx context.Context, channelID string, insertRequest *model return nil, fmt.Errorf("marshalling insert request: %w", err) } - insertReqURL := a.clientURL + "/channels/" + channelID + "/insert" - req, err := http.NewRequestWithContext(ctx, http.MethodPost, insertReqURL, bytes.NewBuffer(reqJSON)) + insertURL := a.clientURL + "/channels/" + channelID + "/insert" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, insertURL, bytes.NewBuffer(reqJSON)) if err != nil { return nil, fmt.Errorf("creating insert request: %w", err) } @@ -31,7 +30,7 @@ func (a *API) Insert(ctx context.Context, channelID string, insertRequest *model defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for insert: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for insert: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.InsertResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go index 2da32d898e..1c10a0ef40 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/insert_test.go @@ -3,7 +3,6 @@ package api import ( "bytes" "context" - "encoding/json" "errors" "io" "net/http" @@ -71,7 +70,8 @@ func TestInsert(t *testing.T) { RowIndex: 1, ExtraColNames: []string{"UNKNOWN"}, NullValueForNotNullColNames: nil, - Message: "The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1"}, + Message: "The given row cannot be converted to the internal format: Extra columns: [UNKNOWN]. Columns not present in the table shouldn't be specified, rowIndex:1", + }, }, Code: "ERR_SCHEMA_CONFLICT", }, diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go index 47b496563b..8c65fa9f8e 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/api/status.go @@ -2,7 +2,6 @@ package api import ( "context" - "encoding/json" "fmt" "net/http" @@ -11,8 +10,8 @@ import ( ) func (a *API) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) { - statusReqURL := a.clientURL + "/channels/" + channelID + "/status" - req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusReqURL, nil) + statusURL := a.clientURL + "/channels/" + channelID + "/status" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusURL, nil) if err != nil { return nil, fmt.Errorf("creating status request: %w", err) } @@ -25,7 +24,7 @@ func (a *API) Status(ctx context.Context, channelID string) (*model.StatusRespon defer func() { httputil.CloseResponse(resp) }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("invalid status code for status: %d, body: %s", resp.StatusCode, string(mustReadAll(resp.Body))) + return nil, fmt.Errorf("invalid status code for status: %d, body: %s", resp.StatusCode, string(mustRead(resp.Body))) } var res model.StatusResponse diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go index e429d19bf5..81121ac7e1 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model.go @@ -3,12 +3,15 @@ package model import ( "regexp" + jsoniter "github.com/json-iterator/go" + "github.com/rudderlabs/rudder-server/warehouse/integrations/snowflake" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) var ( reType = regexp.MustCompile(`(.+?)\([^)]*\)`) + json = jsoniter.ConfigCompatibleWithStandardLibrary ) type ( @@ -31,20 +34,25 @@ type ( Table string `json:"table"` } + ColumnInfo struct { + Type *string `json:"type,omitempty"` + Scale *float64 `json:"scale,omitempty"` + } + ChannelResponse struct { - Success bool `json:"success"` - ChannelID string `json:"channelId"` - ChannelName string `json:"channelName"` - ClientName string `json:"clientName"` - Valid bool `json:"valid"` - Deleted bool `json:"deleted"` - TableSchema map[string]map[string]any `json:"tableSchema"` - Error string `json:"error"` - Code string `json:"code"` - SnowflakeSDKCode string `json:"snowflakeSDKCode"` - SnowflakeAPIHttpCode int64 `json:"snowflakeAPIHttpCode"` - SnowflakeAPIStatusCode int64 `json:"snowflakeAPIStatusCode"` - SnowflakeAPIMessage string `json:"snowflakeAPIMessage"` + Success bool `json:"success"` + ChannelID string `json:"channelId"` + ChannelName string `json:"channelName"` + ClientName string `json:"clientName"` + Valid bool `json:"valid"` + Deleted bool `json:"deleted"` + SnowPipeSchema whutils.ModelTableSchema `json:"-"` + Error string `json:"error"` + Code string `json:"code"` + SnowflakeSDKCode string `json:"snowflakeSDKCode"` + SnowflakeAPIHttpCode int64 `json:"snowflakeAPIHttpCode"` + SnowflakeAPIStatusCode int64 `json:"snowflakeAPIStatusCode"` + SnowflakeAPIMessage string `json:"snowflakeAPIMessage"` } InsertRequest struct { @@ -73,23 +81,41 @@ type ( } ) -func (c *ChannelResponse) SnowPipeSchema() whutils.ModelTableSchema { - warehouseSchema := make(whutils.ModelTableSchema) +func (c *ChannelResponse) UnmarshalJSON(data []byte) error { + type Alias ChannelResponse // Prevent recursion + temp := &struct { + TableSchema map[string]ColumnInfo `json:"tableSchema"` + *Alias + }{ + Alias: (*Alias)(c), + } + if err := json.Unmarshal(data, &temp); err != nil { + return err + } + c.SnowPipeSchema = calculateSnowPipeSchema(temp.TableSchema) + return nil +} - for column, info := range c.TableSchema { - dataType, isValidType := info["type"].(string) - if !isValidType { +func calculateSnowPipeSchema(tableSchema map[string]ColumnInfo) whutils.ModelTableSchema { + if len(tableSchema) == 0 { + return nil + } + warehouseSchema := make(whutils.ModelTableSchema) + for column, info := range tableSchema { + if info.Type == nil { continue } numericScale := int64(0) - if scale, scaleExists := info["scale"].(float64); scaleExists { - numericScale = int64(scale) + if info.Scale != nil { + numericScale = int64(*info.Scale) } - cleanedDataType := reType.ReplaceAllString(dataType, "$1") - - snowflakeType, _ := snowflake.CalculateDataType(cleanedDataType, numericScale) + cleanedDataType := reType.ReplaceAllString(*info.Type, "$1") + snowflakeType, ok := snowflake.CalculateDataType(cleanedDataType, numericScale) + if !ok { + continue + } warehouseSchema[column] = snowflakeType } return warehouseSchema diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go index 9c8696fc02..90f03b17b1 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/internal/model/model_test.go @@ -3,6 +3,7 @@ package model import ( "testing" + "github.com/samber/lo" "github.com/stretchr/testify/require" whutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -26,21 +27,22 @@ func TestTypeRegex(t *testing.T) { } } -func TestChannelResponse_SnowPipeSchema(t *testing.T) { +func TestChannelResponse_CalculateSnowPipeSchema(t *testing.T) { testCases := []struct { name string - tableSchema map[string]map[string]interface{} + tableSchema map[string]ColumnInfo expected whutils.ModelTableSchema }{ { name: "Valid types with scale", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "VARCHAR(16777216)"}, - "column2": {"type": "NUMBER(2,0)", "scale": 2.0}, - "column3": {"type": "NUMBER(2,0)", "scale": 0.0}, - "column4": {"type": "NUMBER(2,0)", "scale": 0}, - "column5": {"type": "BOOLEAN"}, - "column6": {"type": "TIMESTAMP_TZ(9)", "scale": float64(9)}, + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("VARCHAR(16777216)")}, + "column2": {Type: lo.ToPtr("NUMBER(2,0)"), Scale: lo.ToPtr(2.0)}, + "column3": {Type: lo.ToPtr("NUMBER(2,0)"), Scale: lo.ToPtr(0.0)}, + "column4": {Type: lo.ToPtr("NUMBER(2,0)")}, + "column5": {Type: lo.ToPtr("BOOLEAN")}, + "column6": {Type: lo.ToPtr("TIMESTAMP_TZ(9)"), Scale: lo.ToPtr(9.0)}, + "column7": {Type: lo.ToPtr("TIMESTAMP_TZ(9)"), Scale: lo.ToPtr(9.5)}, }, expected: whutils.ModelTableSchema{ "column1": "string", @@ -49,45 +51,91 @@ func TestChannelResponse_SnowPipeSchema(t *testing.T) { "column4": "int", "column5": "boolean", "column6": "datetime", + "column7": "datetime", }, }, { - name: "Invalid type field", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": 12345}, + name: "Unknown type", + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("VARCHAR(16777216)")}, + "column2": {Type: lo.ToPtr("UNKNOWN")}, + }, + expected: whutils.ModelTableSchema{ + "column1": "string", }, - expected: whutils.ModelTableSchema{}, }, { name: "Missing scale for number", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "NUMBER(2,0)"}, + tableSchema: map[string]ColumnInfo{ + "column1": {Type: lo.ToPtr("NUMBER(2,0)")}, }, expected: whutils.ModelTableSchema{ "column1": "int", }, }, + { + name: "Missing type", + tableSchema: map[string]ColumnInfo{ + "column1": {Scale: lo.ToPtr(2.0)}, + }, + expected: whutils.ModelTableSchema{}, + }, { name: "Empty table schema", - tableSchema: map[string]map[string]interface{}{}, - expected: whutils.ModelTableSchema{}, + tableSchema: map[string]ColumnInfo{}, + expected: nil, }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, calculateSnowPipeSchema(tc.tableSchema)) + }) + } +} + +func TestChannelResponse_UnmarshalJSON(t *testing.T) { + testCases := []struct { + name string + response []byte + expectedResponse ChannelResponse + }{ { - name: "Type with regex cleaning", - tableSchema: map[string]map[string]interface{}{ - "column1": {"type": "VARCHAR(255)"}, + name: "Valid success response", + response: []byte(`{"success":true,"channelId":"channelId","channelName":"channelName","clientName":"clientName","valid":true,"deleted":false,"tableSchema":{"EVENT":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"ID":{"type":"VARCHAR(16777216)","logicalType":"TEXT","precision":null,"scale":null,"byteLength":16777216,"length":16777216,"nullable":true},"TIMESTAMP":{"type":"TIMESTAMP_TZ(9)","logicalType":"TIMESTAMP_TZ","precision":0,"scale":9,"byteLength":null,"length":null,"nullable":true}}}`), + expectedResponse: ChannelResponse{ + Success: true, + ChannelID: "channelId", + ChannelName: "channelName", + ClientName: "clientName", + Valid: true, + Deleted: false, + SnowPipeSchema: whutils.ModelTableSchema{ + "EVENT": "string", + "ID": "string", + "TIMESTAMP": "datetime", + }, }, - expected: whutils.ModelTableSchema{ - "column1": "string", + }, + { + name: "Valid failure response", + response: []byte(`{"success":false,"error":"Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.","code":"ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED","snowflakeSDKCode":"0007","snowflakeAPIHttpCode":400,"snowflakeAPIStatusCode":4,"snowflakeAPIMessage":"The supplied table does not exist or is not authorized."}`), + expectedResponse: ChannelResponse{ + Success: false, + Error: "Open channel request failed: HTTP Status: 400 ErrorBody: {\n \"status_code\" : 4,\n \"message\" : \"The supplied table does not exist or is not authorized.\"\n}.", Code: "ERR_TABLE_DOES_NOT_EXIST_OR_NOT_AUTHORIZED", + SnowflakeSDKCode: "0007", + SnowflakeAPIHttpCode: 400, + SnowflakeAPIStatusCode: 4, + SnowflakeAPIMessage: "The supplied table does not exist or is not authorized.", }, }, } - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c := &ChannelResponse{} - c.TableSchema = tc.tableSchema - require.Equal(t, tc.expected, c.SnowPipeSchema()) + var response ChannelResponse + err := response.UnmarshalJSON(tc.response) + require.NoError(t, err) + require.Equal(t, tc.expectedResponse, response) }) } } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go index d7a1bedd15..5b3d82af5f 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/options.go @@ -1,7 +1,5 @@ package snowpipestreaming -type Opt func(*Manager) - func WithRequestDoer(requestDoer requestDoer) Opt { return func(s *Manager) { s.requestDoer = requestDoer diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go index b34c911dc5..c1a20e90bb 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/poll.go @@ -4,65 +4,90 @@ import ( "context" "fmt" "net/http" - "time" + + "go.uber.org/atomic" "github.com/rudderlabs/rudder-go-kit/stringify" obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" ) +// Poll checks the status of multiple imports using the import ID from pollInput. +// It returns a PollStatusResponse indicating if any imports are still in progress or if any have failed. func (m *Manager) Poll(pollInput common.AsyncPoll) common.PollStatusResponse { - m.logger.Infon("Polling started", logger.NewStringField("importId", pollInput.ImportId)) + m.logger.Infon("Polling started") - var uploadInfos []uploadInfo - err := json.Unmarshal([]byte(pollInput.ImportId), &uploadInfos) + var infos []importInfo + err := json.Unmarshal([]byte(pollInput.ImportId), &infos) if err != nil { return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusBadRequest, Complete: true, HasFailed: true, - Error: fmt.Sprintf("failed to unmarshal import id: %v", err), + Error: fmt.Errorf("failed to unmarshal import id: %w", err).Error(), } } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - g, ctx := errgroup.WithContext(ctx) - g.SetLimit(m.config.maxConcurrentPollWorkers.Load()) + anyoneInProgress := atomic.NewBool(false) + for i := range infos { + info := &infos[i] + + inProgress, err := m.pollForImportInfo(ctx, info) + if err != nil { + infos[i].Failed = true + infos[i].Reason = err.Error() - for i, info := range uploadInfos { - g.Go(func() error { - if err := m.pollUploadInfo(ctx, info); err != nil { - uploadInfos[i].Failed = true - uploadInfos[i].Reason = err.Error() - m.logger.Warnn("Failed to poll channel offset", + m.logger.Warnn("Failed to poll channel offset", + logger.NewStringField("channelId", info.ChannelID), + logger.NewStringField("offset", info.Offset), + logger.NewStringField("table", info.Table), + obskit.Error(err), + ) + + if deleteErr := m.deleteChannel(ctx, info.Table, info.ChannelID); deleteErr != nil { + m.logger.Warnn("Failed to delete channel", logger.NewStringField("channelId", info.ChannelID), - logger.NewStringField("offset", info.Offset), logger.NewStringField("table", info.Table), - obskit.Error(err), + obskit.Error(deleteErr), ) } - return nil - }) + continue + } + anyoneInProgress.Store(anyoneInProgress.Load() || inProgress) + } + if anyoneInProgress.Load() { + return common.PollStatusResponse{InProgress: true} + } + + var successJobsCount, failedJobsCount int + var failedExists bool + for _, info := range infos { + if info.Failed { + failedJobsCount += info.Count + failedExists = true + } else { + successJobsCount += info.Count + } } - _ = g.Wait() + m.stats.jobs.failed.Count(failedJobsCount) + m.stats.jobs.succeeded.Count(successJobsCount) - if err := g.Wait(); err != nil { + if failedExists { return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusOK, Complete: true, HasFailed: true, - FailedJobURLs: stringify.Any(uploadInfos), + FailedJobURLs: stringify.Any(infos), } } - return common.PollStatusResponse{ InProgress: false, StatusCode: http.StatusOK, @@ -72,32 +97,26 @@ func (m *Manager) Poll(pollInput common.AsyncPoll) common.PollStatusResponse { } } -func (m *Manager) pollUploadInfo(ctx context.Context, info uploadInfo) error { +func (m *Manager) pollForImportInfo(ctx context.Context, info *importInfo) (bool, error) { log := m.logger.Withn( logger.NewStringField("channelId", info.ChannelID), logger.NewStringField("offset", info.Offset), logger.NewStringField("table", info.Table), ) - log.Infon("Polling for channel") + log.Infon("Polling for import info") - for { - statusRes, err := m.api.Status(ctx, info.ChannelID) - if err != nil { - return fmt.Errorf("getting status: %v", err) - } - if !statusRes.Valid || !statusRes.Success { - return errInvalidStatusResponse - } - if statusRes.Offset == info.Offset { - log.Infon("Polling completed") - return nil - } - log.Infon("Polling in progress. Sleeping before next poll.", - logger.NewStringField("statusOffset", statusRes.Offset), - logger.NewBoolField("statusSuccess", statusRes.Success), - logger.NewBoolField("statusValid", statusRes.Valid), - logger.NewDurationField("pollFrequency", m.config.pollFrequency), - ) - time.Sleep(m.config.pollFrequency) + statusRes, err := m.api.Status(ctx, info.ChannelID) + if err != nil { + return false, fmt.Errorf("getting status: %w", err) + } + log.Infon("Polled import info", + logger.NewBoolField("success", statusRes.Success), + logger.NewStringField("polledOffset", statusRes.Offset), + logger.NewBoolField("valid", statusRes.Valid), + logger.NewBoolField("completed", statusRes.Offset == info.Offset), + ) + if !statusRes.Valid || !statusRes.Success { + return false, errInvalidStatusResponse } + return statusRes.Offset != info.Offset, nil } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go index f9b23c76a5..3f3426c3af 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/go-retryablehttp" jsoniter "github.com/json-iterator/go" + "github.com/samber/lo" "github.com/rudderlabs/rudder-go-kit/bytesize" "github.com/rudderlabs/rudder-go-kit/config" @@ -60,10 +61,7 @@ func New( m.config.client.retryMax = conf.GetInt("SnowpipeStreaming.Client.retryWaitMin", 5) m.config.clientURL = conf.GetString("SnowpipeStreaming.Client.URL", "http://localhost:9078") m.config.instanceID = conf.GetString("INSTANCE_ID", "1") - m.config.pollFrequency = conf.GetDuration("SnowpipeStreaming.pollFrequency", 300, time.Millisecond) m.config.maxBufferCapacity = conf.GetReloadableInt64Var(512*bytesize.KB, bytesize.B, "SnowpipeStreaming.maxBufferCapacity") - m.config.maxConcurrentPollWorkers = conf.GetReloadableIntVar(10, 1, "SnowpipeStreaming.maxConcurrentPollWorkers") - m.config.maxConcurrentUploadWorkers = conf.GetReloadableIntVar(8, 1, "SnowpipeStreaming.maxConcurrentUploadWorkers") tags := stats.Tags{ "module": "batch_router", @@ -71,19 +69,27 @@ func New( "destType": destination.DestinationDefinition.Name, "destinationId": destination.ID, } - m.stats.successJobCount = statsFactory.NewTaggedStat("snowpipestreaming_success_job_count", stats.CountType, tags) - m.stats.failedJobCount = statsFactory.NewTaggedStat("snowpipestreaming_failed_jobs_count", stats.CountType, tags) - m.stats.discardCount = statsFactory.NewTaggedStat("snowpipestreaming_discards_count", stats.CountType, tags) - m.stats.channelSchemaCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_schema_error", stats.CountType, tags) - m.stats.channelTableCreationErrorCount = statsFactory.NewTaggedStat("snowpipestreaming_create_channel_table_error", stats.CountType, tags) + m.stats.jobs.succeeded = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "succeeded", + })) + m.stats.jobs.failed = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "failed", + })) + m.stats.jobs.aborted = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "aborted", + })) + m.stats.jobs.discarded = statsFactory.NewTaggedStat("snowpipe_streaming_jobs", stats.CountType, lo.Assign(tags, stats.Tags{ + "status": "discarded", + })) if m.requestDoer == nil { m.requestDoer = m.retryableClient().StandardClient() } m.api = newApiAdapter( - snowpipeapi.New(m.config.clientURL, m.requestDoer), + m.logger, statsFactory, + snowpipeapi.New(m.config.clientURL, m.requestDoer), destination, ) return m diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go new file mode 100644 index 0000000000..d0d5ba4419 --- /dev/null +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/snowpipestreaming_test.go @@ -0,0 +1,177 @@ +package snowpipestreaming + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestFindNewColumns(t *testing.T) { + tests := []struct { + name string + eventSchema whutils.ModelTableSchema + snowPipeSchema whutils.ModelTableSchema + expected []whutils.ColumnInfo + }{ + { + name: "new column with different data type in event schema", + eventSchema: whutils.ModelTableSchema{ + "new_column": "STRING", + "existing_column": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "INT", + }, + expected: []whutils.ColumnInfo{ + {Name: "new_column", Type: "STRING"}, + }, + }, + { + name: "new and existing columns with multiple data types", + eventSchema: whutils.ModelTableSchema{ + "new_column1": "STRING", + "new_column2": "BOOLEAN", + "existing_column": "INT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "INT", + "another_existing_column": "FLOAT", + }, + expected: []whutils.ColumnInfo{ + {Name: "new_column1", Type: "STRING"}, + {Name: "new_column2", Type: "BOOLEAN"}, + }, + }, + { + name: "all columns in event schema are new", + eventSchema: whutils.ModelTableSchema{ + "new_column1": "STRING", + "new_column2": "BOOLEAN", + "new_column3": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{}, + expected: []whutils.ColumnInfo{ + {Name: "new_column1", Type: "STRING"}, + {Name: "new_column2", Type: "BOOLEAN"}, + {Name: "new_column3", Type: "FLOAT"}, + }, + }, + { + name: "case sensitivity check", + eventSchema: whutils.ModelTableSchema{ + "ColumnA": "STRING", + "columna": "BOOLEAN", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "columna": "BOOLEAN", + }, + expected: []whutils.ColumnInfo{ + {Name: "ColumnA", Type: "STRING"}, + }, + }, + { + name: "all columns match with identical types", + eventSchema: whutils.ModelTableSchema{ + "existing_column1": "STRING", + "existing_column2": "FLOAT", + }, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column1": "STRING", + "existing_column2": "FLOAT", + }, + expected: []whutils.ColumnInfo{}, + }, + { + name: "event schema is empty, SnowPipe schema has columns", + eventSchema: whutils.ModelTableSchema{}, + snowPipeSchema: whutils.ModelTableSchema{ + "existing_column": "STRING", + }, + expected: []whutils.ColumnInfo{}, + }, + { + name: "SnowPipe schema is nil", + eventSchema: whutils.ModelTableSchema{ + "new_column": "STRING", + }, + snowPipeSchema: nil, + expected: []whutils.ColumnInfo{ + {Name: "new_column", Type: "STRING"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := findNewColumns(tt.eventSchema, tt.snowPipeSchema) + assert.ElementsMatch(t, tt.expected, result) + }) + } +} + +func TestDestConfig_Decode(t *testing.T) { + tests := []struct { + name string + input map[string]interface{} + expected destConfig + expectedErr bool + }{ + { + name: "Valid Input", + input: map[string]interface{}{ + "account": "test-account", + "warehouse": "test-warehouse", + "database": "test-database", + "user": "test-user", + "role": "test-role", + "privateKey": "test-key", + "privateKeyPassphrase": "test-passphrase", + "namespace": "test-namespace", + }, + expected: destConfig{ + Account: "test-account", + Warehouse: "test-warehouse", + Database: "test-database", + User: "test-user", + Role: "test-role", + PrivateKey: "test-key", + PrivateKeyPassphrase: "test-passphrase", + Namespace: "TEST_NAMESPACE", + }, + expectedErr: false, + }, + { + name: "Invalid Input", + input: map[string]interface{}{ + "account": 123, // Invalid type + }, + expected: destConfig{}, + expectedErr: true, + }, + { + name: "Empty Map", + input: map[string]interface{}{}, + expected: destConfig{ + Namespace: "STRINGEMPTY", + }, + expectedErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var config destConfig + err := config.Decode(tt.input) + + if tt.expectedErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tt.expected, config) + } + }) + } +} diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml index 0c9f5ea530..4c223f96b6 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testdata/docker-compose.rudder-snowpipe-clients.yml @@ -2,7 +2,7 @@ version: "3.9" services: rudder-snowpipe-clients: - image: "hub.dev-rudder.rudderlabs.com/dockerhub-proxy/rudderstack/rudder-snowpipe-clients:develop" + image: "rudderstack/rudder-snowpipe-clients:develop" ports: - "9078" healthcheck: diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go index c97100f62d..6392fc540b 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/testhelper/testhelper.go @@ -11,10 +11,9 @@ import ( "testing" "time" - "github.com/rudderlabs/rudder-go-kit/testhelper/rand" "github.com/stretchr/testify/require" - whutils "github.com/rudderlabs/rudder-server/warehouse/utils" + "github.com/rudderlabs/rudder-go-kit/testhelper/rand" ) const ( @@ -40,20 +39,18 @@ func GetSnowPipeTestCredentials(key string) (*TestCredentials, error) { var credentials TestCredentials err := json.Unmarshal([]byte(cred), &credentials) if err != nil { - return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %v", key, err) + return nil, fmt.Errorf("unable to marshall %s to snowpipe test credentials: %w", key, err) } return &credentials, nil } -func RandSchema(provider string) string { +func RandSchema() string { hex := strings.ToLower(rand.String(12)) namespace := fmt.Sprintf("test_%s_%d", hex, time.Now().Unix()) - return whutils.ToProviderCase(provider, whutils.ToSafeNamespace(provider, - namespace, - )) + return strings.ToUpper(namespace) } -func DropSchema(t *testing.T, db *sql.DB, namespace string) { +func DropSchema(t testing.TB, db *sql.DB, namespace string) { t.Helper() t.Log("dropping schema", namespace) diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go index 0a44550768..bd366ede3b 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/types.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/mitchellh/mapstructure" + "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" @@ -39,20 +41,18 @@ type ( retryMax int } - clientURL string - instanceID string - pollFrequency time.Duration - maxBufferCapacity config.ValueLoader[int64] - maxConcurrentPollWorkers config.ValueLoader[int] - maxConcurrentUploadWorkers config.ValueLoader[int] + clientURL string + instanceID string + maxBufferCapacity config.ValueLoader[int64] } stats struct { - successJobCount stats.Counter - failedJobCount stats.Counter - discardCount stats.Counter - channelSchemaCreationErrorCount stats.Counter - channelTableCreationErrorCount stats.Counter + jobs struct { + succeeded stats.Counter + failed stats.Counter + aborted stats.Counter + discarded stats.Counter + } } } @@ -84,12 +84,13 @@ type ( Namespace string `mapstructure:"namespace"` } - uploadInfo struct { + importInfo struct { ChannelID string `json:"channelId"` Offset string `json:"offset"` Table string `json:"table"` Failed bool `json:"failed"` Reason string `json:"reason"` + Count int `json:"count"` } discardInfo struct { @@ -100,16 +101,45 @@ type ( uuidTS string } + uploadInfo struct { + tableName string + events []*event + jobIDs []int64 + eventsSchema whutils.ModelTableSchema + discardChannelResponse *model.ChannelResponse + latestJobID int64 + } + api interface { CreateChannel(ctx context.Context, channelReq *model.CreateChannelRequest) (*model.ChannelResponse, error) DeleteChannel(ctx context.Context, channelID string, sync bool) error Insert(ctx context.Context, channelID string, insertRequest *model.InsertRequest) (*model.InsertResponse, error) Status(ctx context.Context, channelID string) (*model.StatusResponse, error) } + + apiAdapter struct { + logger logger.Logger + statsFactory stats.Stats + destination *backendconfig.DestinationT + api + } + + Opt func(*Manager) ) +func (d *destConfig) Decode(m map[string]interface{}) error { + if err := mapstructure.Decode(m, d); err != nil { + return err + } + d.Namespace = whutils.ToProviderCase( + whutils.SnowpipeStreaming, + whutils.ToSafeNamespace(whutils.SnowpipeStreaming, d.Namespace), + ) + return nil +} + func (e *event) setUUIDTimestamp(formattedTimestamp string) { - uuidTimestampColumn := whutils.ToProviderCase(whutils.SNOWFLAKE, "uuid_ts") + uuidTimestampColumn := whutils.ToProviderCase(whutils.SnowpipeStreaming, "uuid_ts") if _, columnExists := e.Message.Metadata.Columns[uuidTimestampColumn]; columnExists { e.Message.Data[uuidTimestampColumn] = formattedTimestamp } diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go index 4c7248e35d..6af3c8252c 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/upload.go @@ -6,12 +6,10 @@ import ( stdjson "encoding/json" "fmt" "os" + "slices" "strconv" - "sync" - "github.com/mitchellh/mapstructure" "github.com/samber/lo" - "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/logger" obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" @@ -22,45 +20,111 @@ import ( whutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) +// Upload processes an async destination upload to a Snowpipe streaming destination. +// It decodes the destination configuration, retrieves events from the specified file, +// and delegates to handleEvents to process the events. func (m *Manager) Upload(asyncDest *common.AsyncDestinationStruct) common.AsyncUploadOutput { m.logger.Infon("Uploading data to snowpipe streaming destination") var destConf destConfig - err := mapstructure.Decode(asyncDest.Destination.Config, &destConf) + err := destConf.Decode(asyncDest.Destination.Config) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to decode destination config: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to decode destination config: %w", err).Error()) } events, err := m.eventsFromFile(asyncDest.FileName) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to read events from file: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to read events from file: %w", err).Error()) } m.logger.Infon("Read events from file", logger.NewIntField("events", int64(len(events)))) - failedJobIDs, successJobIDs, uploadInfos := m.handleEvents(asyncDest, events, destConf) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + discardsChannel, err := m.prepareChannelResponse(ctx, asyncDest.Destination.ID, &destConf, discardsTable(), discardsSchema()) + if err != nil { + return m.abortJobs(asyncDest, fmt.Errorf("failed to prepare discards channel: %w", err).Error()) + } + m.logger.Infon("Prepared discards channel") + + groupedEvents := lo.GroupBy(events, func(event *event) string { + return event.Message.Metadata.Table + }) + uploadInfos := lo.MapToSlice(groupedEvents, func(tableName string, tableEvents []*event) *uploadInfo { + jobIDs := lo.Map(tableEvents, func(event *event, _ int) int64 { + return event.Metadata.JobID + }) + latestJobID := lo.MaxBy(tableEvents, func(a, b *event) bool { + return a.Metadata.JobID > b.Metadata.JobID + }) + return &uploadInfo{ + tableName: tableName, + events: tableEvents, + jobIDs: jobIDs, + eventsSchema: schemaFromEvents(tableEvents), + discardChannelResponse: discardsChannel, + latestJobID: latestJobID.Metadata.JobID, + } + }) + slices.SortFunc(uploadInfos, func(a, b *uploadInfo) int { + return int(a.latestJobID - b.latestJobID) + }) + + var ( + importingJobIDs, failedJobIDs []int64 + importInfos, discardImportInfos []*importInfo + ) + for _, info := range uploadInfos { + imInfo, discardImInfo, err := m.sendEventsToSnowpipe(ctx, asyncDest.Destination.ID, &destConf, info) + if err != nil { + m.logger.Warnn("Failed to send events to Snowpipe", + logger.NewStringField("table", info.tableName), + obskit.Error(err), + ) + + failedJobIDs = append(failedJobIDs, info.jobIDs...) + continue + } + + importingJobIDs = append(importingJobIDs, info.jobIDs...) + importInfos = append(importInfos, imInfo) + + if discardImInfo != nil { + discardImportInfos = append(discardImportInfos, discardImInfo) + } + } + if len(failedJobIDs) > 0 { + m.stats.jobs.failed.Count(len(failedJobIDs)) + } + if len(discardImportInfos) > 0 { + discarded := 0 + for _, info := range discardImportInfos { + discarded += info.Count + } + m.stats.jobs.discarded.Count(discarded) + + importInfos = append(importInfos, discardImportInfos[len(discardImportInfos)-1]) + } var importParameters stdjson.RawMessage - if len(uploadInfos) > 0 { - importIDBytes, err := json.Marshal(uploadInfos) + if len(importInfos) > 0 { + importIDBytes, err := json.Marshal(importInfos) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import id: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import id: %w", err).Error()) } importParameters, err = json.Marshal(common.ImportParameters{ ImportId: string(importIDBytes), }) if err != nil { - return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import parameters: %v", err).Error()) + return m.abortJobs(asyncDest, fmt.Errorf("failed to marshal import parameters: %w", err).Error()) } - } - m.logger.Infon("Uploaded data to snowpipe streaming destination") - - m.stats.failedJobCount.Count(len(failedJobIDs)) - m.stats.successJobCount.Count(len(successJobIDs)) + m.logger.Infon("Uploaded data to snowpipe streaming destination") + } return common.AsyncUploadOutput{ - ImportingJobIDs: successJobIDs, - ImportingCount: len(successJobIDs), + ImportingJobIDs: importingJobIDs, + ImportingCount: len(importingJobIDs), ImportingParameters: importParameters, FailedJobIDs: failedJobIDs, FailedCount: len(failedJobIDs), @@ -68,7 +132,7 @@ func (m *Manager) Upload(asyncDest *common.AsyncDestinationStruct) common.AsyncU } } -func (m *Manager) eventsFromFile(fileName string) ([]event, error) { +func (m *Manager) eventsFromFile(fileName string) ([]*event, error) { file, err := os.Open(fileName) if err != nil { return nil, fmt.Errorf("failed to open file %s: %w", fileName, err) @@ -77,173 +141,96 @@ func (m *Manager) eventsFromFile(fileName string) ([]event, error) { _ = file.Close() }() - var events []event + var events []*event + formattedTS := m.now().Format(misc.RFC3339Milli) scanner := bufio.NewScanner(file) scanner.Buffer(nil, int(m.config.maxBufferCapacity.Load())) for scanner.Scan() { var e event if err := json.Unmarshal(scanner.Bytes(), &e); err != nil { - return nil, fmt.Errorf("failed to unmarshal event: %v", err) + return nil, fmt.Errorf("failed to unmarshal event: %w", err) } + e.setUUIDTimestamp(formattedTS) - events = append(events, e) + events = append(events, &e) } if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error reading from file: %v", err) + return nil, fmt.Errorf("error reading from file: %w", err) } return events, nil } -func (m *Manager) handleEvents( - asyncDest *common.AsyncDestinationStruct, - events []event, - destConf destConfig, -) ( - failedJobIDs []int64, - successJobIDs []int64, - uploadInfos []*uploadInfo, -) { - var ( - discardInfos []discardInfo - mu sync.Mutex - ) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - g, gCtx := errgroup.WithContext(ctx) - g.SetLimit(m.config.maxConcurrentUploadWorkers.Load()) - - groupedEvents := lo.GroupBy(events, func(event event) string { - return event.Message.Metadata.Table - }) - for tableName, tableEvents := range groupedEvents { - g.Go(func() error { - jobIDs := lo.Map(tableEvents, func(event event, _ int) int64 { - return event.Metadata.JobID - }) - - uploadTableInfo, discardTableInfo, err := m.loadTableEventsToSnowPipe( - gCtx, asyncDest, destConf, tableName, tableEvents, - ) - - mu.Lock() - defer mu.Unlock() - - if err != nil { - m.logger.Warnn("Failed to upload events to table", - logger.NewStringField("table", tableName), - obskit.Error(err), - ) - - failedJobIDs = append(failedJobIDs, jobIDs...) - return nil - } - - successJobIDs = append(successJobIDs, jobIDs...) - uploadInfos = append(uploadInfos, uploadTableInfo) - discardInfos = append(discardInfos, discardTableInfo...) - return nil - }) - } - _ = g.Wait() - - if len(discardInfos) > 0 { - discardUploadInfo, err := m.loadDiscardsToSnowPipe(ctx, asyncDest, destConf, discardInfos) - if err != nil { - m.logger.Warnn("Failed to upload events to discards table", - logger.NewStringField("table", discardsTable()), - obskit.Error(err), - ) - } else { - uploadInfos = append(uploadInfos, discardUploadInfo) - } - } - return failedJobIDs, successJobIDs, uploadInfos -} - -func (m *Manager) loadTableEventsToSnowPipe( +func (m *Manager) sendEventsToSnowpipe( ctx context.Context, - asyncDest *common.AsyncDestinationStruct, - destConf destConfig, - tableName string, - tableEvents []event, -) (*uploadInfo, []discardInfo, error) { + destinationID string, + destConf *destConfig, + info *uploadInfo, +) (*importInfo, *importInfo, error) { + offset := strconv.FormatInt(info.latestJobID, 10) + log := m.logger.Withn( - logger.NewStringField("table", tableName), - logger.NewIntField("events", int64(len(tableEvents))), + logger.NewStringField("table", info.tableName), + logger.NewIntField("events", int64(len(info.events))), + logger.NewStringField("offset", offset), ) - log.Infon("Uploading data to table") - - eventSchema := schemaFromEvents(tableEvents) + log.Infon("Sending events to Snowpipe") - channelResponse, err := m.createChannel(ctx, asyncDest, destConf, tableName, eventSchema) + channelResponse, err := m.prepareChannelResponse(ctx, destinationID, destConf, info.tableName, info.eventsSchema) if err != nil { - return nil, nil, fmt.Errorf("creating channel: %v", err) - } - snowPipeSchema := channelResponse.SnowPipeSchema() - - columnInfos := findNewColumns(eventSchema, snowPipeSchema) - if len(columnInfos) > 0 { - if err := m.addColumns(ctx, destConf.Namespace, tableName, columnInfos); err != nil { - return nil, nil, fmt.Errorf("adding columns: %v", err) - } - - channelResponse, err = m.recreateChannel(ctx, asyncDest, destConf, tableName, eventSchema, channelResponse) - if err != nil { - return nil, nil, fmt.Errorf("recreating channel: %v", err) - } - snowPipeSchema = channelResponse.SnowPipeSchema() + return nil, nil, fmt.Errorf("creating channel %s: %w", info.tableName, err) } + m.logger.Infon("Prepared channel", logger.NewStringField("channelID", channelResponse.ChannelID)) formattedTS := m.now().Format(misc.RFC3339Milli) - for _, tableEvent := range tableEvents { - tableEvent.setUUIDTimestamp(formattedTS) - } - - discardInfos := lo.FlatMap(tableEvents, func(tableEvent event, _ int) []discardInfo { - return discardedRecords(tableEvent, snowPipeSchema, tableName, formattedTS) - }) - - oldestEvent := lo.MaxBy(tableEvents, func(a, b event) bool { - return a.Metadata.JobID > b.Metadata.JobID + discardInfos := lo.FlatMap(info.events, func(tableEvent *event, _ int) []discardInfo { + return discardedRecords(tableEvent, channelResponse.SnowPipeSchema, info.tableName, formattedTS) }) - offset := strconv.FormatInt(oldestEvent.Metadata.JobID, 10) insertReq := &model.InsertRequest{ - Rows: lo.Map(tableEvents, func(event event, _ int) model.Row { + Rows: lo.Map(info.events, func(event *event, _ int) model.Row { return event.Message.Data }), Offset: offset, } + insertRes, err := m.api.Insert(ctx, channelResponse.ChannelID, insertReq) - if err != nil { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + defer func() { + if err != nil || !insertRes.Success { + if deleteErr := m.deleteChannel(ctx, info.tableName, channelResponse.ChannelID); deleteErr != nil { + log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) + } } - return nil, nil, fmt.Errorf("inserting data: %v", err) + }() + if err != nil { + return nil, nil, fmt.Errorf("inserting data %s: %w", info.tableName, err) } if !insertRes.Success { - if deleteErr := m.deleteChannel(ctx, tableName, channelResponse.ChannelID); deleteErr != nil { - log.Warnn("Failed to delete channel", obskit.Error(deleteErr)) - } return nil, nil, errInsertingDataFailed } - log.Infon("Successfully uploaded data to table") - info := &uploadInfo{ + var discardImInfo *importInfo + if len(discardInfos) > 0 { + discardImInfo, err = m.sendDiscardEVentsToSnowpipe(ctx, info, discardInfos) + if err != nil { + return nil, nil, fmt.Errorf("sending discard events to Snowpipe: %w", err) + } + } + log.Infon("Sent events to Snowpipe") + + imInfo := &importInfo{ ChannelID: channelResponse.ChannelID, Offset: offset, - Table: tableName, + Table: info.tableName, + Count: len(info.events), } - return info, discardInfos, nil + return imInfo, discardImInfo, nil } -// schemaFromEvents Iterate over events and merge their columns into the final map -// Keeping the first type first serve basis -func schemaFromEvents(events []event) whutils.ModelTableSchema { +// schemaFromEvents builds a schema by iterating over events and merging their columns +// using a first-encountered type basis for each column. +func schemaFromEvents(events []*event) whutils.ModelTableSchema { columnsMap := make(whutils.ModelTableSchema) for _, e := range events { for col, typ := range e.Message.Metadata.Columns { @@ -256,7 +243,7 @@ func schemaFromEvents(events []event) whutils.ModelTableSchema { } func (m *Manager) abortJobs(asyncDest *common.AsyncDestinationStruct, abortReason string) common.AsyncUploadOutput { - m.stats.failedJobCount.Count(len(asyncDest.ImportingJobIDs)) + m.stats.jobs.aborted.Count(len(asyncDest.ImportingJobIDs)) return common.AsyncUploadOutput{ AbortJobIDs: asyncDest.ImportingJobIDs, AbortCount: len(asyncDest.ImportingJobIDs), diff --git a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go index c3f790d382..ce780d3e4d 100644 --- a/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go +++ b/router/batchrouter/asyncdestinationmanager/snowpipestreaming/uploadstats.go @@ -4,56 +4,53 @@ import ( "fmt" "net/http" - obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" "github.com/tidwall/gjson" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/rudderlabs/rudder-server/router/batchrouter/asyncdestinationmanager/common" ) +// GetUploadStats retrieves the upload status for Snowpipe streaming jobs. +// It parses the input to identify succeeded and failed tables, maps job IDs to their statuses, +// and returns a response with succeeded job IDs, failed job IDs, and failure reasons. func (m *Manager) GetUploadStats(input common.GetUploadStatsInput) common.GetUploadStatsResponse { - m.logger.Infon("Getting upload stats for snowpipe streaming destination") + m.logger.Infon("Getting import stats for snowpipe streaming destination") - var infos []uploadInfo + var infos []importInfo err := json.Unmarshal([]byte(input.FailedJobURLs), &infos) if err != nil { m.logger.Warnn("Failed to unmarshal failed job urls", obskit.Error(err)) return common.GetUploadStatsResponse{ - StatusCode: 500, - Error: fmt.Errorf("failed to unmarshal failed job urls: %v", err).Error(), + StatusCode: http.StatusBadRequest, + Error: fmt.Errorf("failed to unmarshal failed job urls: %w", err).Error(), } } - var ( - succeededTables map[string]uploadInfo - failedTables map[string]uploadInfo - ) - + succeededTables, failedTables := make(map[string]struct{}), make(map[string]importInfo) for _, info := range infos { if info.Failed { failedTables[info.Table] = info } else { - succeededTables[info.Table] = info + succeededTables[info.Table] = struct{}{} } } var ( succeededJobIDs []int64 failedJobIDs []int64 - failedJobReasons map[int64]string + failedJobReasons = make(map[int64]string) ) - for _, job := range input.ImportingList { tableName := gjson.GetBytes(job.EventPayload, "metadata.table").String() if _, ok := succeededTables[tableName]; ok { succeededJobIDs = append(succeededJobIDs, job.JobID) - continue } if info, ok := failedTables[tableName]; ok { failedJobIDs = append(failedJobIDs, job.JobID) failedJobReasons[job.JobID] = info.Reason } } - return common.GetUploadStatsResponse{ StatusCode: http.StatusOK, Metadata: common.EventStatMeta{ diff --git a/router/batchrouter/handle_async.go b/router/batchrouter/handle_async.go index 3571503a03..0a0157b959 100644 --- a/router/batchrouter/handle_async.go +++ b/router/batchrouter/handle_async.go @@ -25,6 +25,7 @@ import ( routerutils "github.com/rudderlabs/rudder-server/router/utils" "github.com/rudderlabs/rudder-server/services/rmetrics" "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/timeutil" utilTypes "github.com/rudderlabs/rudder-server/utils/types" ) @@ -379,7 +380,7 @@ func (brt *Handle) asyncStructSetup(sourceID, destinationID string, attemptNums brt.asyncDestinationStruct[destinationID].FirstAttemptedAts = firstAttemptedAts brt.asyncDestinationStruct[destinationID].OriginalJobParameters = originalJobParameters brt.asyncDestinationStruct[destinationID].FileName = jsonPath - brt.asyncDestinationStruct[destinationID].CreatedAt = time.Now() + brt.asyncDestinationStruct[destinationID].CreatedAt = timeutil.Now() brt.asyncDestinationStruct[destinationID].SourceJobRunID = newJobRunID } diff --git a/router/batchrouter/handle_lifecycle.go b/router/batchrouter/handle_lifecycle.go index 676e5c4145..ce8394015a 100644 --- a/router/batchrouter/handle_lifecycle.go +++ b/router/batchrouter/handle_lifecycle.go @@ -211,9 +211,9 @@ func (brt *Handle) setupReloadableVars() { brt.jobsDBCommandTimeout = config.GetReloadableDurationVar(600, time.Second, "JobsDB.BatchRouter.CommandRequestTimeout", "JobsDB.CommandRequestTimeout") brt.jobdDBQueryRequestTimeout = config.GetReloadableDurationVar(600, time.Second, "JobsDB.BatchRouter.QueryRequestTimeout", "JobsDB.QueryRequestTimeout") brt.jobdDBMaxRetries = config.GetReloadableIntVar(2, 1, "JobsDB.BatchRouter.MaxRetries", "JobsDB.MaxRetries") - brt.minIdleSleep = config.GetReloadableDurationVar(2, time.Second, "BatchRouter.minIdleSleep") - brt.uploadFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter.uploadFreqInS", "BatchRouter.uploadFreq") - brt.mainLoopFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter.mainLoopFreq") + brt.minIdleSleep = config.GetReloadableDurationVar(2, time.Second, "BatchRouter."+brt.destType+".minIdleSleep", "BatchRouter.minIdleSleep") + brt.uploadFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter."+brt.destType+".uploadFreqInS", "BatchRouter."+brt.destType+".uploadFreq", "BatchRouter.uploadFreqInS", "BatchRouter.uploadFreq") + brt.mainLoopFreq = config.GetReloadableDurationVar(30, time.Second, "BatchRouter."+brt.destType+".mainLoopFreq", "BatchRouter.mainLoopFreq") brt.warehouseServiceMaxRetryTime = config.GetReloadableDurationVar(3, time.Hour, "BatchRouter.warehouseServiceMaxRetryTime", "BatchRouter.warehouseServiceMaxRetryTimeinHr") brt.datePrefixOverride = config.GetReloadableStringVar("", "BatchRouter.datePrefixOverride") brt.customDatePrefix = config.GetReloadableStringVar("", "BatchRouter.customDatePrefix") diff --git a/warehouse/integrations/manager/manager.go b/warehouse/integrations/manager/manager.go index e55465e958..d550f45fe0 100644 --- a/warehouse/integrations/manager/manager.go +++ b/warehouse/integrations/manager/manager.go @@ -65,7 +65,7 @@ func New(destType string, conf *config.Config, logger logger.Logger, stats stats return redshift.New(conf, logger, stats), nil case warehouseutils.BQ: return bigquery.New(conf, logger), nil - case warehouseutils.SNOWFLAKE: + case warehouseutils.SNOWFLAKE, warehouseutils.SnowpipeStreaming: return snowflake.New(conf, logger, stats), nil case warehouseutils.POSTGRES: return postgres.New(conf, logger, stats), nil @@ -90,7 +90,7 @@ func NewWarehouseOperations(destType string, conf *config.Config, logger logger. return redshift.New(conf, logger, stats), nil case warehouseutils.BQ: return bigquery.New(conf, logger), nil - case warehouseutils.SNOWFLAKE: + case warehouseutils.SNOWFLAKE, warehouseutils.SnowpipeStreaming: return snowflake.New(conf, logger, stats), nil case warehouseutils.POSTGRES: return postgres.New(conf, logger, stats), nil diff --git a/warehouse/integrations/testdata/docker-compose.transformer.yml b/warehouse/integrations/testdata/docker-compose.transformer.yml new file mode 100644 index 0000000000..08899300e3 --- /dev/null +++ b/warehouse/integrations/testdata/docker-compose.transformer.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + transformer: + image: "rudderstack/rudder-transformer:latest" + ports: + - "9090:9090" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://0.0.0.0:9090/health || exit 1 + interval: 1s + retries: 25 diff --git a/warehouse/utils/reservedkeywords.go b/warehouse/utils/reservedkeywords.go index f8b9fc3daa..71f553f8d7 100644 --- a/warehouse/utils/reservedkeywords.go +++ b/warehouse/utils/reservedkeywords.go @@ -94,6 +94,99 @@ var ReservedKeywords = map[string]map[string]bool{ "WHERE": true, "WITH": true, }, + "SNOWPIPE_STREAMING": { + "ACCOUNT": true, + "ALL": true, + "ALTER": true, + "AND": true, + "ANY": true, + "AS": true, + "BETWEEN": true, + "BY": true, + "CASE": true, + "CAST": true, + "CHECK": true, + "COLUMN": true, + "CONNECT": true, + "CONNECTION": true, + "CONSTRAINT": true, + "CREATE": true, + "CROSS": true, + "CURRENT": true, + "CURRENT_DATE": true, + "CURRENT_TIME": true, + "CURRENT_TIMESTAMP": true, + "CURRENT_USER": true, + "DATABASE": true, + "DELETE": true, + "DISTINCT": true, + "DROP": true, + "ELSE": true, + "EXISTS": true, + "FALSE": true, + "FOLLOWING": true, + "FOR": true, + "FROM": true, + "FULL": true, + "GRANT": true, + "GROUP": true, + "GSCLUSTER": true, + "HAVING": true, + "ILIKE": true, + "IN": true, + "INCREMENT": true, + "INNER": true, + "INSERT": true, + "INTERSECT": true, + "INTO": true, + "IS": true, + "ISSUE": true, + "JOIN": true, + "LATERAL": true, + "LEFT": true, + "LIKE": true, + "LOCALTIME": true, + "LOCALTIMESTAMP": true, + "MINUS": true, + "NATURAL": true, + "NOT": true, + "NULL": true, + "OF": true, + "ON": true, + "OR": true, + "ORDER": true, + "ORGANIZATION": true, + "QUALIFY": true, + "REGEXP": true, + "REVOKE": true, + "RIGHT": true, + "RLIKE": true, + "ROW": true, + "ROWS": true, + "SAMPLE": true, + "SCHEMA": true, + "SELECT": true, + "SET": true, + "SOME": true, + "START": true, + "TABLE": true, + "TABLESAMPLE": true, + "THEN": true, + "TO": true, + "TRIGGER": true, + "TRUE": true, + "TRY_CAST": true, + "UNION": true, + "UNIQUE": true, + "UPDATE": true, + "USING": true, + "VALUES": true, + "VIEW": true, + "WHEN": true, + "WHENEVER": true, + "WHERE": true, + "WITH": true, + }, "RS": { "AES128": true, "AES256": true, diff --git a/warehouse/utils/uploader.go b/warehouse/utils/uploader.go index 50af3e044f..b35ddb0ad3 100644 --- a/warehouse/utils/uploader.go +++ b/warehouse/utils/uploader.go @@ -7,11 +7,6 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/internal/model" ) -type ( - ModelWarehouse = model.Warehouse - ModelTableSchema = model.TableSchema -) - //go:generate mockgen -destination=../internal/mocks/utils/mock_uploader.go -package mock_uploader github.com/rudderlabs/rudder-server/warehouse/utils Uploader type Uploader interface { IsWarehouseSchemaEmpty() bool diff --git a/warehouse/utils/utils.go b/warehouse/utils/utils.go index e32ab9ef77..b556f8937b 100644 --- a/warehouse/utils/utils.go +++ b/warehouse/utils/utils.go @@ -38,17 +38,18 @@ import ( ) const ( - RS = "RS" - BQ = "BQ" - SNOWFLAKE = "SNOWFLAKE" - POSTGRES = "POSTGRES" - CLICKHOUSE = "CLICKHOUSE" - MSSQL = "MSSQL" - AzureSynapse = "AZURE_SYNAPSE" - DELTALAKE = "DELTALAKE" - S3Datalake = "S3_DATALAKE" - GCSDatalake = "GCS_DATALAKE" - AzureDatalake = "AZURE_DATALAKE" + RS = "RS" + BQ = "BQ" + SNOWFLAKE = "SNOWFLAKE" + SnowpipeStreaming = "SNOWPIPE_STREAMING" + POSTGRES = "POSTGRES" + CLICKHOUSE = "CLICKHOUSE" + MSSQL = "MSSQL" + AzureSynapse = "AZURE_SYNAPSE" + DELTALAKE = "DELTALAKE" + S3Datalake = "S3_DATALAKE" + GCSDatalake = "GCS_DATALAKE" + AzureDatalake = "AZURE_DATALAKE" ) const ( @@ -206,18 +207,6 @@ type ColumnInfo struct { Type string } -type Destination struct { - Source backendconfig.SourceT - Destination backendconfig.DestinationT -} - -type Schema model.Schema - -type KeyValue struct { - Key string - Value interface{} -} - type GetLoadFilesOptions struct { Table string StartID int64 @@ -230,6 +219,13 @@ type LoadFile struct { Metadata json.RawMessage } +type ( + ModelWarehouse = model.Warehouse + ModelTableSchema = model.TableSchema + ModelValidationRequest = model.ValidationRequest + ModelValidationResponse = model.ValidationResponse +) + func IDResolutionEnabled() bool { return enableIDResolution } @@ -523,7 +519,8 @@ ToProviderCase converts string provided to case generally accepted in the wareho e.g. columns are uppercase in SNOWFLAKE and lowercase etc. in REDSHIFT, BIGQUERY etc */ func ToProviderCase(provider, str string) string { - if strings.ToUpper(provider) == SNOWFLAKE { + upperCaseProvider := strings.ToUpper(provider) + if upperCaseProvider == SNOWFLAKE || upperCaseProvider == SnowpipeStreaming { str = strings.ToUpper(str) } return str