cilium · tixxdz · Sep 20, 2023 · Jul 20, 2023 · Sep 11, 2023 · Sep 20, 2023
@@ -177,10 +177,11 @@ event_execve(struct sched_execve_args *ctx)
 
 	p = &event->process;
 	p->flags = EVENT_EXECVE;
-	/* Send the TGID and TID, as during an execve all threads other
-	 * than the calling thread are destroyed, but since we hook late
-	 * during the execve then the calling thread at the hook time is
-	 * already the new thread group leader.
+	/**
+	 * Per thread tracking rules TID == PID :
+	 *  At exec all threads other than the calling one are destroyed, so
+	 *  current becomes the new thread leader since we hook late during
+	 *  execve.
 	 */
 	p->pid = pid >> 32;
 	p->tid = (__u32)pid;

@@ -56,7 +56,12 @@ static inline __attribute__((always_inline)) void event_exit_send(void *ctx, __u
 		exit->current.pad[3] = 0;
 		exit->current.ktime = enter->key.ktime;
 
-		/* We track and report only thread leader so here tgid == tid */
+		/**
+		 * Per thread tracking rules TID == PID :
+		 *  We want the exit event to match the exec one, and since during exec
+		 *  we report the thread group leader, do same here as we read the exec
+		 *  entry from the execve_map anyway and explicitly set it to the to tgid.
+		 */
 		exit->info.tid = tgid;
 		probe_read(&exit->info.code, sizeof(exit->info.code),
 			   _(&task->exit_code));

@@ -75,9 +75,11 @@ BPF_KPROBE(event_wake_up_new_task, struct task_struct *task)
 			.common.ktime = curr->key.ktime,
 			.parent = curr->pkey,
 			.tgid = curr->key.pid,
-			/* Since we generate one event per thread group, then when
-			 * the task wakes up, there will be only one thread here:
-			 * the thread group leader. Pass its thread id to user-space.
+			/**
+			 * Per thread tracking rules TID == PID :
+			 *  Since we generate one event per thread group, then when this task
+			 *  wakes up it will be the only one in the thread group, and it is
+			 *  the leader. Ensure to pass TID to user space.
 			 */
 			.tid = BPF_CORE_READ(task, pid),
 			.ktime = curr->key.ktime,

@@ -79,7 +79,10 @@ generic_process_init(struct msg_generic_kprobe *e, u8 op, struct event_config *c
 
 	e->action = 0;
 
-	/* Initialize with the calling TID */
+	/**
+	 * Per thread tracking rules TID is the calling thread:
+	 *  At kprobes, tracpoints etc we report the calling thread ID to user space.
+	 */
 	e->tid = (__u32)get_current_pid_tgid();
 }
 

@@ -69,10 +69,14 @@ func HandleGenericInternal(ev notify.Event, pid uint32, tid *uint32, timestamp u
 	}
 
 	if internal != nil {
+		// When we report the per thread fields, take a copy
+		// of the thread leader from the cache then update the corresponding
+		// per thread fields.
+		//
+		// The cost to get this is relatively high because it requires a
+		// deep copy of all the fields of the thread leader from the cache in
+		// order to safely modify them, to not corrupt gRPC streams.
 		proc := internal.GetProcessCopy()
-		// The TID of the cached process can be different from the
-		// TID that triggered the event, so always use the recorded
-		// one from bpf.
 		process.UpdateEventProcessTid(proc, tid)
 		ev.SetProcess(proc)
 	} else {
@@ -97,10 +101,14 @@ func HandleGenericEvent(internal *process.ProcessInternal, ev notify.Event, tid
 		return ErrFailedToGetPodInfo
 	}
 
+	// When we report the per thread fields, take a copy
+	// of the thread leader from the cache then update the corresponding
+	// per thread fields.
+	//
+	// The cost to get this is relatively high because it requires a
+	// deep copy of all the fields of the thread leader from the cache in
+	// order to safely modify them, to not corrupt gRPC streams.
 	proc := internal.GetProcessCopy()
-	// The TID of the cached process can be different from the
-	// TID that triggered the event, so always use the recorded
-	// one from bpf.
 	process.UpdateEventProcessTid(proc, tid)
 	ev.SetProcess(proc)
 	return nil

@@ -288,6 +288,34 @@ func GetProcessExit(event *MsgExitEventUnix) *tetragon.ProcessExit {
 	code := event.Info.Code >> 8
 	signal := readerexec.Signal(event.Info.Code & 0xFF)
 
+	// Per thread tracking rules PID == TID.
+	//
+	// Exit events should have TID==PID at same time we want to correlate
+	// the {TID,PID} of the exit event with the {TID,PID} pair from the exec
+	// event. They must match because its how we link the exit event to the
+	// exec one.
+	//
+	// The exit event is constructed when looking up the process by its PID
+	// from user space cache, so we endup with the TID that was pushed
+	// into the process cache during clone or exec.
+	//
+	// Add extra logic to WARN on conditions where TID!=PID to aid debugging
+	// and catch this unexpected case. Typically this indicates a bug either
+	// in BPF or userspace caching logic. When this condition is encountered
+	// we warn about it, but for the exit event the TID of the cache process
+	// will be re-used.
+	//
+	// Check must be against event.Info.Tid so we cover all the cases of
+	// the tetragonProcess.Pid against BPF.
+	if tetragonProcess.Pid.GetValue() != event.Info.Tid {
+		logger.GetLogger().WithFields(logrus.Fields{
+			"event.name":           "Exit",
+			"event.process.pid":    event.ProcessKey.Pid,
+			"event.process.tid":    event.Info.Tid,
+			"event.process.binary": tetragonProcess.Binary,
+		}).Warn("ExitEvent: process PID and TID mismatch")
+	}
+
 	tetragonEvent := &tetragon.ProcessExit{
 		Process: tetragonProcess,
 		Parent:  tetragonParent,
@@ -306,10 +334,7 @@ func GetProcessExit(event *MsgExitEventUnix) *tetragon.ProcessExit {
 		parent.RefDec()
 	}
 	if proc != nil {
-		tetragonEvent.Process = proc.GetProcessCopy()
 		proc.RefDec()
-		// Use the bpf recorded TID to update the event
-		process.UpdateEventProcessTid(tetragonEvent.Process, &event.Info.Tid)
 	}
 	return tetragonEvent
 }
@@ -339,13 +364,12 @@ func (msg *MsgExitEventUnix) RetryInternal(ev notify.Event, timestamp uint64) (*
 	}
 
 	if internal != nil {
+		// Use cached version of the process
+		ev.SetProcess(internal.UnsafeGetProcess())
 		if !msg.RefCntDone[ProcessRefCnt] {
 			internal.RefDec()
 			msg.RefCntDone[ProcessRefCnt] = true
 		}
-		proc := internal.GetProcessCopy()
-		// Update the Process TID with the recorded one from BPF side
-		process.UpdateEventProcessTid(proc, &msg.Info.Tid)
 	} else {
 		errormetrics.ErrorTotalInc(errormetrics.EventCacheProcessInfoFailed)
 		err = eventcache.ErrFailedToGetProcessInfo
@@ -358,7 +382,7 @@ func (msg *MsgExitEventUnix) RetryInternal(ev notify.Event, timestamp uint64) (*
 }
 
 func (msg *MsgExitEventUnix) Retry(internal *process.ProcessInternal, ev notify.Event) error {
-	return eventcache.HandleGenericEvent(internal, ev, &msg.Info.Tid)
+	return eventcache.HandleGenericEvent(internal, ev, nil)
 }
 
 func (msg *MsgExitEventUnix) HandleMessage() *tetragon.GetEventsResponse {

@@ -273,8 +273,14 @@ func GetProcessKprobe(event *MsgGenericKprobeUnix) *tetragon.ProcessKprobe {
 	}
 
 	if proc != nil {
+		// At kprobes we report the per thread fields, so take a copy
+		// of the thread leader from the cache then update the corresponding
+		// per thread fields.
+		//
+		// The cost to get this is relatively high because it requires a
+		// deep copy of all the fields of the thread leader from the cache in
+		// order to safely modify them, to not corrupt gRPC streams.
 		tetragonEvent.Process = proc.GetProcessCopy()
-		// Use the bpf recorded TID to update the event
 		process.UpdateEventProcessTid(tetragonEvent.Process, &event.Tid)
 	}
 	if parent != nil {
@@ -373,10 +379,14 @@ func (msg *MsgGenericTracepointUnix) HandleMessage() *tetragon.GetEventsResponse
 	}
 
 	if proc != nil {
-		tetragonEvent.Process = proc.GetProcessCopy()
-		// Use the bpf recorded TID to update the event
+		// At tracepoints we report the per thread fields, so take a copy
+		// of the thread leader from the cache then update the corresponding
+		// per thread fields.
+		//
 		// The cost to get this is relatively high because it requires a
-		// deep copyo of the process in order to safely modify it.
+		// deep copy of all the fields of the thread leader from the cache in
+		// order to safely modify them, to not corrupt gRPC streams.
+		tetragonEvent.Process = proc.GetProcessCopy()
 		process.UpdateEventProcessTid(tetragonEvent.Process, &msg.Tid)
 	}
 
@@ -593,8 +603,14 @@ func GetProcessUprobe(event *MsgGenericUprobeUnix) *tetragon.ProcessUprobe {
 	}
 
 	if proc != nil {
+		// At uprobes we report the per thread fields, so take a copy
+		// of the thread leader from the cache then update the corresponding
+		// per thread fields.
+		//
+		// The cost to get this is relatively high because it requires a
+		// deep copy of all the fields of the thread leader from the cache in
+		// order to safely modify them, to not corrupt gRPC streams.
 		tetragonEvent.Process = proc.GetProcessCopy()
-		// Use the bpf recorded TID to update the event
 		process.UpdateEventProcessTid(tetragonEvent.Process, &event.Tid)
 	}
 	return tetragonEvent

@@ -202,13 +202,32 @@ func initProcessInternalExec(
 	protoPod := GetPodInfo(containerID, process.Filename, args, process.NSPID)
 	caps := caps.GetMsgCapabilities(capabilities)
 	ns := namespace.GetMsgNamespaces(namespaces)
+	binary := path.GetBinaryAbsolutePath(process.Filename, cwd)
+
+	// Per thread tracking rules PID == TID
+	//
+	// Ensure that exported events have the TID set. For events generated by
+	// kernel threads PID will be 0, so instead of checking against 0,
+	// assert that TGID == TID
+	if process.PID != process.TID {
+		logger.GetLogger().WithFields(logrus.Fields{
+			"event.name":            "Execve",
+			"event.process.pid":     process.PID,
+			"event.process.tid":     process.TID,
+			"event.process.binary":  binary,
+			"event.process.exec_id": execID,
+			"event.parent.exec_id":  parentExecID,
+		}).Warn("ExecveEvent: process PID and TID mismatch")
+		// Explicitly reset TID to be PID
+		process.TID = process.PID
+	}
 	return &ProcessInternal{
 		process: &tetragon.Process{
 			Pid:          &wrapperspb.UInt32Value{Value: process.PID},
 			Tid:          &wrapperspb.UInt32Value{Value: process.TID},
 			Uid:          &wrapperspb.UInt32Value{Value: process.UID},
 			Cwd:          cwd,
-			Binary:       path.GetBinaryAbsolutePath(process.Filename, cwd),
+			Binary:       binary,
 			Arguments:    args,
 			Flags:        strings.Join(exec.DecodeCommonFlags(process.Flags), " "),
 			StartTime:    ktime.ToProtoOpt(process.Ktime, (process.Flags&api.EventProcFS) == 0),
@@ -243,8 +262,9 @@ func initProcessInternalClone(event *tetragonAPI.MsgCloneEvent,
 	pi.process.ParentExecId = parentExecId
 	pi.process.ExecId = GetProcessID(event.PID, event.Ktime)
 	pi.process.Pid = &wrapperspb.UInt32Value{Value: event.PID}
-	// Since from BPF side we only generate one clone event per
-	// thread group that is for the leader, assert on that.
+	// Per thread tracking rules PID == TID: ensure that we get TID equals PID.
+	//  Since from BPF side we only generate one clone event per
+	//  thread group that is for the leader, assert on that.
 	if event.PID != event.TID {
 		logger.GetLogger().WithFields(logrus.Fields{
 			"event.name":            "Clone",
@@ -254,11 +274,11 @@ func initProcessInternalClone(event *tetragonAPI.MsgCloneEvent,
 			"event.parent.exec_id":  parentExecId,
 		}).Debug("CloneEvent: process PID and TID mismatch")
 	}
+	// Set the TID here and if we have an exit without an exec we report
+	// directly this TID without copying again objects.
+	// At kprobe times we use the returned TIDs from bpf side.
+	pi.process.Tid = &wrapperspb.UInt32Value{Value: event.PID}
 
-	// This TID will be updated by the TID of the bpf execve event later,
-	// so set it to zero here and ensure that it will be updated later.
-	// Exported events must always be generated with a non zero TID.
-	pi.process.Tid = &wrapperspb.UInt32Value{Value: 0}
 	pi.process.Flags = strings.Join(exec.DecodeCommonFlags(event.Flags), " ")
 	pi.process.StartTime = ktime.ToProto(event.Ktime)
 	pi.process.Refcnt = 1
@@ -310,19 +330,6 @@ func AddExecEvent(event *tetragonAPI.MsgExecveEventUnix) *ProcessInternal {
 		proc = initProcessInternalExec(event.Process, event.Kube.Docker, event.CleanupProcess, event.Capabilities, event.Namespaces)
 	}
 
-	// Ensure that exported events have the TID set. For events from Kernel
-	// we usually use PID == 0, so instead of checking against 0, assert that
-	// TGID == TID
-	if proc.process.Pid.GetValue() != proc.process.Tid.GetValue() {
-		logger.GetLogger().WithFields(logrus.Fields{
-			"event.name":            "Execve",
-			"event.process.pid":     proc.process.Pid.GetValue(),
-			"event.process.tid":     proc.process.Tid.GetValue(),
-			"event.process.exec_id": proc.process.ExecId,
-			"event.process.binary":  proc.process.Binary,
-		}).Warn("ExecveEvent: process PID and TID mismatch")
-	}
-
 	procCache.add(proc)
 	return proc
 }