tulip · eparker-tulip · Nov 12, 2024 · Oct 30, 2024 · Nov 8, 2024 · Nov 12, 2024
diff --git a/lib/oplog/tail.go b/lib/oplog/tail.go
@@ -37,12 +37,10 @@ type Tailer struct {
 // Raw oplog entry from Mongo
 type rawOplogEntry struct {
 	Timestamp    primitive.Timestamp `bson:"ts"`
-	HistoryID    int64               `bson:"h"`
-	MongoVersion int                 `bson:"v"`
 	Operation    string              `bson:"op"`
 	Namespace    string              `bson:"ns"`
 	Doc          bson.Raw            `bson:"o"`
-	Update       rawOplogEntryID     `bson:"o2"`
+	Update       bson.Raw	    	 `bson:"o2"`
 }
 
 // Parsed Cursor Result
@@ -52,10 +50,6 @@ type cursorResultStatus struct {
 	DidLosePosition bool
 }
 
-type rawOplogEntryID struct {
-	ID interface{} `bson:"_id"`
-}
-
 const requeryDuration = time.Second
 
 var (
@@ -214,7 +208,7 @@ func (tailer *Tailer) tailOnce(out []PublisherChannels, stop <-chan bool, readOr
 					continue
 				}
 
-				ts, pubs, sendMetricsData := tailer.unmarshalEntry(rawData, tailer.Denylist, readOrdinal)
+				ts, pubs, sendMetricsData := tailer.processEntry(rawData, tailer.Denylist, readOrdinal)
 
 				if ts != nil {
 					lastTimestamp = *ts
@@ -366,16 +360,77 @@ func closeCursor(cursor *mongo.Cursor) {
 	}
 }
 
-// unmarshalEntry unmarshals a single entry from the oplog.
+// unmarshalEntryMetadata processes the top-level data from an entry and returns a rawOplogEntry object.
+func unmarshalEntryMetadata(rawData bson.Raw, denylist *sync.Map) *rawOplogEntry {
+	var result rawOplogEntry
+	var ok bool
+	nsLookup, err := rawData.LookupErr("ns");
+	if err == nil {
+		result.Namespace, ok = nsLookup.StringValueOK()
+		if !ok {
+			// this means there was a type mismatch
+			log.Log.Error("Error unmarshalling oplog namespace entry")
+			return nil
+		}
+	}
+
+	// try to filter early if possible
+	if len(result.Namespace) > 0 && result.Namespace != "admin.$cmd" {
+		db, _ := parseNamespace(result.Namespace)
+		if _, denied := denylist.Load(db); denied {
+			log.Log.Debugw("Skipping oplog entry", "database", db)
+			metricOplogEntriesFiltered.WithLabelValues(db).Add(1)
+			return nil
+		}
+	}
+
+	tsLookup, err := rawData.LookupErr("ts")
+	if err == nil {
+		t, i, ok := tsLookup.TimestampOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog timestamp entry")
+			return nil
+		}
+		result.Timestamp = primitive.Timestamp{T: t, I: i}
+	}
+
+	opLookup, err := rawData.LookupErr("op")
+	if err == nil {
+		result.Operation, ok = opLookup.StringValueOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog operation entry")
+			return nil
+		}
+	}
+
+	oLookup, err := rawData.LookupErr("o")
+	if err == nil {
+		result.Doc, ok = oLookup.DocumentOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog document entry")
+			return nil
+		}
+	}
+
+	o2Lookup, err := rawData.LookupErr("o2")
+	if err == nil {
+		result.Update, ok = o2Lookup.DocumentOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog update entry")
+			return nil
+		}
+	}
+
+	return &result
+}
+
+// processEntry processes a single entry from the oplog.
 //
 // The timestamp of the entry is returned so that tailOnce knows the timestamp of the last entry it read, even if it
 // ignored it or failed at some later step.
-func (tailer *Tailer) unmarshalEntry(rawData bson.Raw, denylist *sync.Map, readOrdinal int) (timestamp *primitive.Timestamp, pubs []*redispub.Publication, sendMetricsData func()) {
-	var result rawOplogEntry
-
-	err := bson.Unmarshal(rawData, &result)
-	if err != nil {
-		log.Log.Errorw("Error unmarshalling oplog entry", "error", err)
+func (tailer *Tailer) processEntry(rawData bson.Raw, denylist *sync.Map, readOrdinal int) (timestamp *primitive.Timestamp, pubs []*redispub.Publication, sendMetricsData func()) {
+	result := unmarshalEntryMetadata(rawData, denylist)
+	if result == nil {
 		return
 	}
 
@@ -483,8 +538,21 @@ func (tailer *Tailer) getStartTime(maxOrdinal int, getTimestampOfLastOplogEntry
 	return primitive.Timestamp{T: uint32(time.Now().Unix() << 32)}
 }
 
+func parseID(idRaw bson.RawValue) (id interface{}, err error) {
+	if idRaw.IsZero() {
+		log.Log.Error("failed to get objectId: _id is empty or not set")
+		err = errors.New("empty or missing objectId")
+		return
+	}
+	err = idRaw.Unmarshal(&id)
+	if err != nil {
+		log.Log.Errorf("failed to unmarshal objectId: %v", err)
+	}
+	return
+}
+
 // converts a rawOplogEntry to an oplogEntry
-func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []oplogEntry {
+func (tailer *Tailer) parseRawOplogEntry(entry *rawOplogEntry, txIdx *uint) []oplogEntry {
 	if txIdx == nil {
 		idx := uint(0)
 		txIdx = &idx
@@ -505,19 +573,14 @@ func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []opl
 
 		out.Database, out.Collection = parseNamespace(out.Namespace)
 
+		var errID error
 		if out.Operation == operationUpdate {
-			out.DocID = entry.Update.ID
+			out.DocID, errID = parseID(entry.Update.Lookup("_id"))
 		} else {
-			idLookup := entry.Doc.Lookup("_id")
-			if idLookup.IsZero() {
-				log.Log.Error("failed to get objectId: _id is empty or not set")
-				return nil
-			}
-			err := idLookup.Unmarshal(&out.DocID)
-			if err != nil {
-				log.Log.Errorf("failed to unmarshal objectId: %v", err)
-				return nil
-			}
+			out.DocID, errID = parseID(entry.Doc.Lookup("_id"))
+		}
+		if errID != nil {
+			return nil
 		}
 
 		return []oplogEntry{out}
@@ -540,7 +603,7 @@ func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []opl
 
 		for _, v := range txData.ApplyOps {
 			v.Timestamp = entry.Timestamp
-			ret = append(ret, tailer.parseRawOplogEntry(v, txIdx)...)
+			ret = append(ret, tailer.parseRawOplogEntry(&v, txIdx)...)
 		}
 
 		return ret

diff --git a/lib/oplog/tail_test.go b/lib/oplog/tail_test.go
@@ -104,16 +104,6 @@ func TestGetStartTime(t *testing.T) {
 	}
 }
 
-func mustRaw(t *testing.T, data interface{}) bson.Raw {
-	b, err := bson.Marshal(data)
-	require.NoError(t, err)
-
-	var raw bson.Raw
-	require.NoError(t, bson.Unmarshal(b, &raw))
-
-	return raw
-}
-
 func TestParseRawOplogEntry(t *testing.T) {
 	tests := map[string]struct {
 		in   rawOplogEntry
@@ -124,7 +114,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "i",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"_id": "someid", "foo": "bar"}),
+				Doc:       rawBson(t, map[string]interface{}{"_id": "someid", "foo": "bar"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -141,8 +131,8 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "u",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"new": "data"}),
-				Update:    rawOplogEntryID{ID: "updateid"},
+				Doc:       rawBson(t, map[string]interface{}{"new": "data"}),
+				Update:    rawBson(t, map[string]interface{}{"_id": "updateid"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -159,7 +149,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "d",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"_id": "someid"}),
+				Doc:       rawBson(t, map[string]interface{}{"_id": "someid"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -176,7 +166,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "c",
 				Namespace: "foo.$cmd",
-				Doc:       mustRaw(t, map[string]interface{}{"drop": "Foo"}),
+				Doc:       rawBson(t, map[string]interface{}{"drop": "Foo"}),
 			},
 			want: nil,
 		},
@@ -185,47 +175,51 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "c",
 				Namespace: "admin.$cmd",
-				Doc: mustRaw(t, map[string]interface{}{
+				Doc: rawBson(t, map[string]interface{}{
 					"applyOps": []rawOplogEntry{
 						{
 							Timestamp: primitive.Timestamp{T: 1234},
 							Operation: "c",
 							Namespace: "admin.$cmd",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"applyOps": []rawOplogEntry{
 									{
 										Operation: "i",
 										Namespace: "foo.Bar",
-										Doc: mustRaw(t, map[string]interface{}{
+										Doc: rawBson(t, map[string]interface{}{
 											"_id": "id1",
 											"foo": "baz",
 										}),
+										Update: rawBson(t, map[string]interface{}{}),
 									},
 								},
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 						{
 							Operation: "i",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"_id": "id1",
 								"foo": "bar",
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 						{
 							Operation: "u",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"foo": "quux",
 							}),
-							Update: rawOplogEntryID{"id2"},
+							Update: rawBson(t, map[string]interface{}{"_id": "id2"}),
 						},
 						{
 							Operation: "d",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"_id": "id3",
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 					},
 				}),
@@ -287,7 +281,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 
 	for testName, test := range tests {
 		t.Run(testName, func(t *testing.T) {
-			got := (&Tailer{Denylist: &sync.Map{}}).parseRawOplogEntry(test.in, nil)
+			got := (&Tailer{Denylist: &sync.Map{}}).parseRawOplogEntry(&test.in, nil)
 
 			if diff := pretty.Compare(parseEntry(t, got), parseEntry(t, test.want)); diff != "" {
 				t.Errorf("Got incorrect result (-got +want)\n%s", diff)