Skip to content

Commit

Permalink
source-sqlserver: Support DATETIME as backfill keys
Browse files Browse the repository at this point in the history
By default we serialize these as an RFC3339 timestamp, but SQL
Server doesn't support that as an input format for a DATETIME
column since DATETIMEs only have three digits of sub-second
precision and no time zone.

To make this work we just have to adjust the FDB row key encode
operation so that when the column type is `datetime` we apply a
different string format. This doesn't change the handling of the
actual capture output values, only row key serialization.
  • Loading branch information
willdonnelly committed Nov 5, 2024
1 parent 7afe1ff commit ba0f13c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 1 deletion.
49 changes: 49 additions & 0 deletions source-sqlserver/.snapshots/TestScanKeyTypes-DateTime
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
####################################
### Capture from Start
####################################
# ================================
# Collection "acmeCo/test/test_scankeytypes_datetime_889298060005": 1 Documents
# ================================
{"_meta":{"op":"c","source":{"schema":"dbo","snapshot":true,"table":"test_ScanKeyTypes_DateTime_889298060005","lsn":"","seqval":"AAAAAAAAAAAAAA=="}},"k":"<TIMESTAMP>","v":"Data 0"}
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"dbo%2Ftest_ScanKeyTypes_DateTime_889298060005":{"backfilled":1,"key_columns":["k"],"mode":"Backfill","scanned":"AjE5OTEtMDgtMzFUMTI6MzQ6NTQuMTEwAA=="}},"cursor":"AAAAAAAAAAAAAA=="}


####################################
### Capture from Key "AjE5OTEtMDgtMzFUMTI6MzQ6NTQuMTEwAA=="
####################################
# ================================
# Collection "acmeCo/test/test_scankeytypes_datetime_889298060005": 1 Documents
# ================================
{"_meta":{"op":"c","source":{"schema":"dbo","snapshot":true,"table":"test_ScanKeyTypes_DateTime_889298060005","lsn":"","seqval":"AAAAAAAAAAAAAA=="}},"k":"<TIMESTAMP>","v":"Data 1"}
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"dbo%2Ftest_ScanKeyTypes_DateTime_889298060005":{"backfilled":2,"key_columns":["k"],"mode":"Backfill","scanned":"AjE5OTEtMDgtMzFUMTI6MzQ6NTQuMzMzAA=="}},"cursor":"AAAAAAAAAAAAAA=="}


####################################
### Capture from Key "AjE5OTEtMDgtMzFUMTI6MzQ6NTQuMzMzAA=="
####################################
# ================================
# Collection "acmeCo/test/test_scankeytypes_datetime_889298060005": 1 Documents
# ================================
{"_meta":{"op":"c","source":{"schema":"dbo","snapshot":true,"table":"test_ScanKeyTypes_DateTime_889298060005","lsn":"","seqval":"AAAAAAAAAAAAAA=="}},"k":"<TIMESTAMP>","v":"Data 2"}
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"dbo%2Ftest_ScanKeyTypes_DateTime_889298060005":{"backfilled":3,"key_columns":["k"],"mode":"Backfill","scanned":"AjIwMDAtMDEtMDFUMDE6MDE6MDEuMDAwAA=="}},"cursor":"AAAAAAAAAAAAAA=="}


####################################
### Capture from Key "AjIwMDAtMDEtMDFUMDE6MDE6MDEuMDAwAA=="
####################################
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"dbo%2Ftest_ScanKeyTypes_DateTime_889298060005":{"backfilled":3,"key_columns":["k"],"mode":"Active"}},"cursor":"AAAAAAAAAAAAAA=="}



13 changes: 12 additions & 1 deletion source-sqlserver/datatypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ const (
// thus it can be sorted lexicographically as bytes.
sortableRFC3339Nano = "2006-01-02T15:04:05.000000000Z07:00"

// In order to be round-tripped successfully as a backfill key, datetimes must be
// serialized to a string with three decimal digits of precision and no time zone.
// This is only done for row key serialization, not the actual values in a capture's
// output documents.
datetimeKeyEncoding = "2006-01-02T15:04:05.000"

// SQL Server sort ordering for `uniqueidentifier` columns is absolutely insane,
// so we have to reshuffle the underlying bytes pretty hard to produce a row key
// whose bytewise lexicographic ordering matches SQL Server sorting rules.
Expand All @@ -35,7 +41,12 @@ const (
func encodeKeyFDB(key, ktype interface{}) (tuple.TupleElement, error) {
switch key := key.(type) {
case time.Time:
return key.Format(sortableRFC3339Nano), nil
switch ktype {
case "datetime":
return key.Format(datetimeKeyEncoding), nil
default:
return key.Format(sortableRFC3339Nano), nil
}
case []byte:
switch ktype {
case "uniqueidentifier":
Expand Down
1 change: 1 addition & 0 deletions source-sqlserver/datatypes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ func TestScanKeyTypes(t *testing.T) {
}},
{"Numeric", "NUMERIC(10,5)", []any{"-1234", "0", "-3", "2", "12345.12", "12345.121", "12345.1205", "12345.12059", "12346"}},
{"Decimal", "DECIMAL(10,5)", []any{"-1234", "0", "-3", "2", "12345.12", "12345.121", "12345.1205", "12345.12059", "12346"}},
{"DateTime", "DATETIME", []any{"1991-08-31T12:34:54.111", "1991-08-31T12:34:54.333", "2000-01-01T01:01:01"}},
} {
t.Run(tc.Name, func(t *testing.T) {
var uniqueID = fmt.Sprintf("88929806%04d", idx)
Expand Down

0 comments on commit ba0f13c

Please sign in to comment.