From a1da1b607800033d2f6fec046b0fba456dd91695 Mon Sep 17 00:00:00 2001 From: Kevin Sheldrake Date: Mon, 11 Mar 2024 16:28:15 +0000 Subject: [PATCH] Memory: Swap probe_read to kernel or user version We should always use the probe_read_kernel or probe_read_user helpers over the probe_read helper (ditto for _str versions). This commit changes all probe_read to either probe_read_kernel or probe_read_user (ditto for _str versions). Signed-off-by: Kevin Sheldrake --- bpf/Makefile | 27 +- bpf/cgroup/bpf_cgroup_events.h | 2 +- bpf/include/api.h | 3 + bpf/lib/bpf_cgroup.h | 20 +- bpf/lib/bpf_helpers.h | 11 +- bpf/lib/bpf_task.h | 27 +- bpf/lib/generic.h | 1 + bpf/lib/process.h | 2 +- bpf/libbpf/bpf_core_read.h | 382 +++++++++++++++--- bpf/libbpf/bpf_tracing.h | 6 +- bpf/process/bpf_execve_bprm_commit_creds.c | 1 + bpf/process/bpf_execve_event.c | 22 +- bpf/process/bpf_exit.c | 1 + bpf/process/bpf_exit.h | 4 +- bpf/process/bpf_generic_retkprobe.c | 63 ++- bpf/process/bpf_generic_tracepoint.c | 16 +- bpf/process/bpf_loader.c | 7 +- bpf/process/bpf_process_event.h | 140 +++---- bpf/process/data_event.h | 51 ++- bpf/process/generic_calls.h | 9 +- bpf/process/retprobe_map.h | 11 +- bpf/process/types/basic.h | 327 ++++++++------- bpf/process/types/probe_read_kernel_or_user.h | 150 +++++++ bpf/process/types/skb.h | 65 +-- bpf/process/types/sock.h | 35 +- pkg/api/tracingapi/client_kprobe.go | 2 + pkg/kernels/kernels.go | 2 + pkg/sensors/tracing/args.go | 9 +- pkg/sensors/tracing/generickprobe.go | 15 +- pkg/sensors/tracing/generictracepoint.go | 5 +- pkg/sensors/tracing/genericuprobe.go | 2 +- pkg/sensors/tracing/kprobe_test.go | 23 +- pkg/sensors/tracing/tracepoint_amd64_test.go | 8 +- pkg/sensors/tracing/tracepoint_test.go | 4 +- 34 files changed, 976 insertions(+), 477 deletions(-) create mode 100644 bpf/process/types/probe_read_kernel_or_user.h diff --git a/bpf/Makefile b/bpf/Makefile index c04f90dd7b7..842573ad044 100644 --- a/bpf/Makefile +++ b/bpf/Makefile @@ -25,6 +25,11 @@ PROCESS = bpf_execve_event.o bpf_execve_event_v53.o bpf_fork.o bpf_exit.o bpf_ge bpf_generic_tracepoint_v511.o \ bpf_multi_kprobe_v511.o bpf_multi_retkprobe_v511.o \ bpf_generic_uprobe_v511.o \ + bpf_execve_event_v54.o \ + bpf_generic_kprobe_v54.o bpf_generic_retkprobe_v54.o \ + bpf_generic_tracepoint_v54.o \ + bpf_multi_kprobe_v54.o bpf_multi_retkprobe_v54.o \ + bpf_generic_uprobe_v54.o \ bpf_loader.o \ bpf_enforcer.o bpf_multi_enforcer.o bpf_fmodret_enforcer.o @@ -71,6 +76,7 @@ endef # Generic build targets for each sub-dir $(eval $(call DEFINE_VARIANT,v53)) +$(eval $(call DEFINE_VARIANT,v54)) $(eval $(call DEFINE_VARIANT,v511)) $(eval $(call DEFINE_VARIANT,v61)) @@ -123,22 +129,31 @@ $(DEPSDIR)%_v53.d: $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ objs/bpf_multi_kprobe_v61.ll objs/bpf_multi_retkprobe_v61.ll: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -D__MULTI_KPROBE -c $< -o $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -D__MULTI_KPROBE -c $< -o $@ objs/%_v61.ll: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -c $< -o $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -c $< -o $@ $(DEPSDIR)%_v61.d: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -D__V61_BPF_PROG -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ objs/bpf_multi_kprobe_v511.ll objs/bpf_multi_retkprobe_v511.ll: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -D__MULTI_KPROBE -c $< -o $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -D__MULTI_KPROBE -c $< -o $@ objs/%_v511.ll: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -c $< -o $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -c $< -o $@ $(DEPSDIR)%_v511.d: - $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__LARGE_MAP_KEYS -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__LARGE_MAP_KEYS -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ + +objs/bpf_multi_kprobe_v54.ll objs/bpf_multi_retkprobe_v54.ll: + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -D__MULTI_KPROBE -c $< -o $@ + +objs/%_v54.ll: + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -c $< -o $@ + +$(DEPSDIR)%_v54.d: + $(CLANG) $(CLANG_FLAGS) -D__LARGE_BPF_PROG -D__PROBE_KERNEL -MM -MP -MT $(patsubst $(DEPSDIR)%.d, $(OBJSDIR)%.ll, $@) $< > $@ # BPFTESTDIR objs/%.ll: $(BPFTESTDIR)%.c diff --git a/bpf/cgroup/bpf_cgroup_events.h b/bpf/cgroup/bpf_cgroup_events.h index 026ac358617..9227eaf85e1 100644 --- a/bpf/cgroup/bpf_cgroup_events.h +++ b/bpf/cgroup/bpf_cgroup_events.h @@ -49,7 +49,7 @@ send_cgrp_event(struct bpf_raw_tracepoint_args *ctx, msg->cgrp_data.level = cgrp_track->level; msg->cgrp_data.hierarchy_id = cgrp_track->hierarchy_id; memcpy(&msg->cgrp_data.name, &cgrp_track->name, KN_NAME_LENGTH); - probe_read_str(&msg->path, PATH_MAP_SIZE - 1, path); + probe_read_kernel_str(&msg->path, PATH_MAP_SIZE - 1, path); perf_event_output_metric(ctx, MSG_OP_CGROUP, &tcpmon_map, BPF_F_CURRENT_CPU, msg, size); diff --git a/bpf/include/api.h b/bpf/include/api.h index 9c272ab738b..7cfaa944c53 100644 --- a/bpf/include/api.h +++ b/bpf/include/api.h @@ -204,6 +204,9 @@ static int BPF_FUNC(fib_lookup, void *ctx, struct bpf_fib_lookup *params, uint32 static int BPF_FUNC(probe_read, void *dst, uint32_t size, const void *src); static int BPF_FUNC(probe_read_str, void *dst, int size, const void *src); static int BPF_FUNC(probe_read_kernel, void *dst, uint32_t size, const void *src); +static int BPF_FUNC(probe_read_kernel_str, void *dst, int size, const void *src); +static int BPF_FUNC(probe_read_user, void *dst, uint32_t size, const void *src); +static int BPF_FUNC(probe_read_user_str, void *dst, int size, const void *src); static uint64_t BPF_FUNC(get_current_task); diff --git a/bpf/lib/bpf_cgroup.h b/bpf/lib/bpf_cgroup.h index 94ccb106d7c..b07c8cd8ef8 100644 --- a/bpf/lib/bpf_cgroup.h +++ b/bpf/lib/bpf_cgroup.h @@ -9,6 +9,8 @@ #include "environ_conf.h" #include "common.h" #include "process.h" +#include "../process/types/probe_read_kernel_or_user.h" +#include "bpf_tracing.h" #define NULL ((void *)0) @@ -109,7 +111,7 @@ __get_cgroup_kn_name(const struct kernfs_node *kn) const char *name = NULL; if (kn) - probe_read(&name, sizeof(name), _(&kn->name)); + probe_read_kernel(&name, sizeof(name), _(&kn->name)); return name; } @@ -139,7 +141,7 @@ __get_cgroup_kn_id(const struct kernfs_node *kn) if (BPF_CORE_READ_INTO(&id, old_kn, id.id) != 0) return 0; } else { - probe_read(&id, sizeof(id), _(&kn->id)); + probe_read_kernel(&id, sizeof(id), _(&kn->id)); } return id; @@ -157,7 +159,7 @@ __get_cgroup_kn(const struct cgroup *cgrp) struct kernfs_node *kn = NULL; if (cgrp) - probe_read(&kn, sizeof(cgrp->kn), _(&cgrp->kn)); + probe_read_kernel(&kn, sizeof(cgrp->kn), _(&cgrp->kn)); return kn; } @@ -187,7 +189,7 @@ get_cgroup_hierarchy_id(const struct cgroup *cgrp) * @cgrp: target cgroup * * Returns a pointer to the cgroup node name on success that can - * be read with probe_read(). NULL on failures. + * be read with probe_read_kernel(). NULL on failures. */ static inline __attribute__((always_inline)) const char * get_cgroup_name(const struct cgroup *cgrp) @@ -214,7 +216,7 @@ get_cgroup_level(const struct cgroup *cgrp) { __u32 level = 0; - probe_read(&level, sizeof(level), _(&cgrp->level)); + probe_read_kernel(&level, sizeof(level), _(&cgrp->level)); return level; } @@ -264,7 +266,7 @@ get_task_cgroup(struct task_struct *task, __u32 subsys_idx, __u32 *error_flags) struct css_set *cgroups; struct cgroup *cgrp = NULL; - probe_read(&cgroups, sizeof(cgroups), _(&task->cgroups)); + probe_read_kernel(&cgroups, sizeof(cgroups), _(&task->cgroups)); if (unlikely(!cgroups)) { *error_flags |= EVENT_ERROR_CGROUPS; return cgrp; @@ -297,13 +299,13 @@ get_task_cgroup(struct task_struct *task, __u32 subsys_idx, __u32 *error_flags) * support as much as workload as possible. It also reduces errors * in a significant way. */ - probe_read(&subsys, sizeof(subsys), _(&cgroups->subsys[subsys_idx])); + probe_read_kernel(&subsys, sizeof(subsys), _(&cgroups->subsys[subsys_idx])); if (unlikely(!subsys)) { *error_flags |= EVENT_ERROR_CGROUP_SUBSYS; return cgrp; } - probe_read(&cgrp, sizeof(cgrp), _(&subsys->cgroup)); + probe_read_kernel(&cgrp, sizeof(cgrp), _(&subsys->cgroup)); if (!cgrp) *error_flags |= EVENT_ERROR_CGROUP_SUBSYSCGRP; @@ -426,7 +428,7 @@ __init_cgrp_tracking_val_heap(struct cgroup *cgrp, cgroup_state state) kn = __get_cgroup_kn(cgrp); name = __get_cgroup_kn_name(kn); if (name) - probe_read_str(&heap->name, KN_NAME_LENGTH - 1, name); + probe_read_kernel_str(&heap->name, KN_NAME_LENGTH - 1, name); return heap; } diff --git a/bpf/lib/bpf_helpers.h b/bpf/lib/bpf_helpers.h index f57410880af..91b5691aac5 100644 --- a/bpf/lib/bpf_helpers.h +++ b/bpf/lib/bpf_helpers.h @@ -43,7 +43,7 @@ * Following define is to assist VSCode Intellisense so that it treats * __builtin_preserve_access_index() as a const void * instead of a * simple void (because it doesn't have a definition for it). This stops - * Intellisense marking all _(P) macros (used in probe_read()) as errors. + * Intellisense marking all _(P) macros (used in probe_read_kernel()) as errors. * To use this, just define VSCODE in 'C/C++: Edit Configurations (JSON)' * in the Command Palette in VSCODE (F1 or View->Command Palette...): * "defines": ["VSCODE"] @@ -54,15 +54,6 @@ const void *__builtin_preserve_access_index(void *); #endif #define _(P) (__builtin_preserve_access_index(P)) -/* - * Convenience macro to check that field actually exists in target kernel's. - * Returns: - * 1, if matching field is present in target kernel; - * 0, if no matching field found. - */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) - /* second argument to __builtin_preserve_enum_value() built-in */ enum bpf_enum_value_kind { BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */ diff --git a/bpf/lib/bpf_task.h b/bpf/lib/bpf_task.h index f4e2e9bc13a..c1e2713642d 100644 --- a/bpf/lib/bpf_task.h +++ b/bpf/lib/bpf_task.h @@ -7,6 +7,7 @@ #include "bpf_event.h" #include "bpf_helpers.h" #include "generic.h" +#include "bpf_tracing.h" /* __d_path_local flags */ // #define UNRESOLVED_MOUNT_POINTS 0x01 // (deprecated) @@ -27,7 +28,7 @@ get_parent(struct task_struct *t) struct task_struct *task; /* Read the real parent */ - probe_read(&task, sizeof(task), _(&t->real_parent)); + probe_read_kernel(&task, sizeof(task), _(&t->real_parent)); if (!task) return 0; return task; @@ -47,7 +48,7 @@ get_task_from_pid(__u32 pid) i = TASK_PID_LOOP; continue; } - probe_read(&cpid, sizeof(cpid), _(&task->tgid)); + probe_read_kernel(&cpid, sizeof(cpid), _(&task->tgid)); if (cpid == pid) { i = TASK_PID_LOOP; continue; @@ -70,7 +71,7 @@ static inline __attribute__((always_inline)) __u32 get_task_pid_vnr(void) thread_pid_exists = bpf_core_field_exists(task->thread_pid); if (thread_pid_exists) { - probe_read(&pid, sizeof(pid), _(&task->thread_pid)); + probe_read_kernel(&pid, sizeof(pid), _(&task->thread_pid)); if (!pid) return 0; } else { @@ -85,16 +86,16 @@ static inline __attribute__((always_inline)) __u32 get_task_pid_vnr(void) if (!thread_pid_exists) link_sz = 24; // voodoo magic, hard-code 24 to init stack - probe_read(&link, link_sz, - (void *)_(&task->pids) + (PIDTYPE_PID * link_sz)); + probe_read_kernel(&link, link_sz, + (void *)_(&task->pids) + (PIDTYPE_PID * link_sz)); pid = link.pid; } upid_sz = bpf_core_field_size(pid->numbers[0]); - probe_read(&level, sizeof(level), _(&pid->level)); + probe_read_kernel(&level, sizeof(level), _(&pid->level)); if (level < 1) return 0; - probe_read(&upid, upid_sz, - (void *)_(&pid->numbers) + (level * upid_sz)); + probe_read_kernel(&upid, upid_sz, + (void *)_(&pid->numbers) + (level * upid_sz)); return upid.nr; } @@ -106,7 +107,7 @@ event_find_parent_pid(struct task_struct *t) if (!task) return 0; - probe_read(&pid, sizeof(pid), _(&task->tgid)); + probe_read_kernel(&pid, sizeof(pid), _(&task->tgid)); return pid; } @@ -119,10 +120,10 @@ __event_find_parent(struct task_struct *task) #pragma unroll for (i = 0; i < 4; i++) { - probe_read(&task, sizeof(task), _(&task->real_parent)); + probe_read_kernel(&task, sizeof(task), _(&task->real_parent)); if (!task) break; - probe_read(&pid, sizeof(pid), _(&task->tgid)); + probe_read_kernel(&pid, sizeof(pid), _(&task->tgid)); value = execve_map_get_noinit(pid); if (value && value->key.ktime != 0) return value; @@ -164,13 +165,13 @@ event_find_curr(__u32 *ppid, bool *walked) #pragma unroll for (i = 0; i < 4; i++) { - probe_read(&pid, sizeof(pid), _(&task->tgid)); + probe_read_kernel(&pid, sizeof(pid), _(&task->tgid)); value = execve_map_get_noinit(pid); if (value && value->key.ktime != 0) break; value = 0; *walked = 1; - probe_read(&task, sizeof(task), _(&task->real_parent)); + probe_read_kernel(&task, sizeof(task), _(&task->real_parent)); if (!task) break; } diff --git a/bpf/lib/generic.h b/bpf/lib/generic.h index ea31582e04d..01cbc57476d 100644 --- a/bpf/lib/generic.h +++ b/bpf/lib/generic.h @@ -48,6 +48,7 @@ struct msg_generic_kprobe { /* anything above is shared with the userspace so it should match structs MsgGenericKprobe and MsgGenericTracepoint in Go */ char args[24000]; unsigned long a0, a1, a2, a3, a4; + unsigned long ret; long argsoff[MAX_POSSIBLE_ARGS]; struct msg_selector_data sel; __u32 idx; // attach cookie index diff --git a/bpf/lib/process.h b/bpf/lib/process.h index bdadea67115..6f4c90cf597 100644 --- a/bpf/lib/process.h +++ b/bpf/lib/process.h @@ -51,7 +51,7 @@ * Now we want to read this with call 45 aka probe_read_str as follows, * where 'kernel_struct_arg' is the kernel data struct we are reading. * - * probe_read_str(args[offset], size, kernel_struct_arg) + * probe_read_kernel_str(args[offset], size, kernel_struct_arg) * * But we have a bit of a problem determining if 'size' is out of array * range. The math would be, diff --git a/bpf/libbpf/bpf_core_read.h b/bpf/libbpf/bpf_core_read.h index 27634770a94..e03ecaa356f 100644 --- a/bpf/libbpf/bpf_core_read.h +++ b/bpf/libbpf/bpf_core_read.h @@ -19,6 +19,12 @@ enum bpf_field_info_kind { BPF_FIELD_RSHIFT_U64 = 5, }; +/* second argument to __builtin_btf_type_id() built-in */ +enum bpf_type_id_kind { + BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */ + BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */ +}; + /* second argument to __builtin_preserve_type_info() built-in */ enum bpf_type_info_kind { BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ @@ -29,29 +35,31 @@ enum bpf_type_info_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst, \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) + bpf_probe_read_kernel( \ + (void *)dst, \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so * for big-endian we need to adjust destination pointer accordingly, based on * field byte size */ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ - bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ - __CORE_RELO(src, fld, BYTE_SIZE), \ - (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) + bpf_probe_read_kernel( \ + (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #endif /* * Extract bitfield, identified by s->field, and return its value as u64. * All this is done in relocatable manner, so bitfield changes such as * signedness, bit size, offset changes, this will be handled automatically. - * This version of macro is using bpf_probe_read() to read underlying integer - * storage. Macro functions as an expression and its return type is - * bpf_probe_read()'s return value: 0, on success, <0 on error. + * This version of macro is using bpf_probe_read_kernel() to read underlying + * integer storage. Macro functions as an expression and its return type is + * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error. */ #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ unsigned long long val = 0; \ @@ -75,11 +83,19 @@ enum bpf_type_info_kind { const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ unsigned long long val; \ \ + /* This is a so-called barrier_var() operation that makes specified \ + * variable "a black box" for optimizing compiler. \ + * It forces compiler to perform BYTE_OFFSET relocation on p and use \ + * its calculated value in the switch below, instead of applying \ + * the same relocation 4 times for each individual memory load. \ + */ \ + asm volatile("" : "=r"(p) : "0"(p)); \ + \ switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ - case 1: val = *(const unsigned char *)p; \ - case 2: val = *(const unsigned short *)p; \ - case 4: val = *(const unsigned int *)p; \ - case 8: val = *(const unsigned long long *)p; \ + case 1: val = *(const unsigned char *)p; break; \ + case 2: val = *(const unsigned short *)p; break; \ + case 4: val = *(const unsigned int *)p; break; \ + case 8: val = *(const unsigned long long *)p; break; \ } \ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ if (__CORE_RELO(s, field, SIGNED)) \ @@ -89,21 +105,121 @@ enum bpf_type_info_kind { val; \ }) +/* + * Write to a bitfield, identified by s->field. + * This is the inverse of BPF_CORE_WRITE_BITFIELD(). + */ +#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \ + void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \ + unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \ + unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \ + unsigned long long mask, val, nval = new_val; \ + unsigned int rpad = rshift - lshift; \ + \ + asm volatile("" : "+r"(p)); \ + \ + switch (byte_size) { \ + case 1: val = *(unsigned char *)p; break; \ + case 2: val = *(unsigned short *)p; break; \ + case 4: val = *(unsigned int *)p; break; \ + case 8: val = *(unsigned long long *)p; break; \ + } \ + \ + mask = (~0ULL << rshift) >> lshift; \ + val = (val & ~mask) | ((nval << rpad) & mask); \ + \ + switch (byte_size) { \ + case 1: *(unsigned char *)p = val; break; \ + case 2: *(unsigned short *)p = val; break; \ + case 4: *(unsigned int *)p = val; break; \ + case 8: *(unsigned long long *)p = val; break; \ + } \ +}) + +/* Differentiator between compilers builtin implementations. This is a + * requirement due to the compiler parsing differences where GCC optimizes + * early in parsing those constructs of type pointers to the builtin specific + * type, resulting in not being possible to collect the required type + * information in the builtin expansion. + */ +#ifdef __clang__ +#define ___bpf_typeof(type) ((typeof(type) *) 0) +#else +#define ___bpf_typeof1(type, NR) ({ \ + extern typeof(type) *___concat(bpf_type_tmp_, NR); \ + ___concat(bpf_type_tmp_, NR); \ +}) +#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__) +#endif + +#ifdef __clang__ +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field) +#else +#define ___bpf_field_ref1(field) (&(field)) +#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field)) +#endif +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* - * Convenience macro to get byte size of a field. Works for integers, + * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). + */ +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) + +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). + */ +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) + +/* + * Convenience macro to get BTF type ID of a specified type, using a local BTF + * information. Return 32-bit unsigned integer with type ID from program's own + * BTF. Always succeeds. + */ +#define bpf_core_type_id_local(type) \ + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL) + +/* + * Convenience macro to get BTF type ID of a target kernel's type that matches + * specified local type. + * Returns: + * - valid 32-bit unsigned type ID in kernel BTF; + * - 0, if no matching type was found in a target kernel BTF. */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_type_id_kernel(type) \ + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET) /* * Convenience macro to check that provided named type @@ -113,7 +229,27 @@ enum bpf_type_info_kind { * 0, if no matching type is found. */ #define bpf_core_type_exists(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS) + +/* + * Convenience macro to check that provided named type + * (struct/union/enum/typedef) "matches" that in a target kernel. + * Returns: + * 1, if the type matches in the target kernel's BTF; + * 0, if the type does not match any in the target kernel + */ +#define bpf_core_type_matches(type) \ + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES) + +/* + * Convenience macro to get the byte size of a provided named type + * (struct/union/enum/typedef) in a target kernel. + * Returns: + * >= 0 size (in bytes), if type is present in target kernel's BTF; + * 0, if no matching type is found. + */ +#define bpf_core_type_size(type) \ + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE) /* * Convenience macro to check that provided enumerator value is defined in @@ -123,8 +259,13 @@ enum bpf_type_info_kind { * kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value_exists(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) +#else +#define bpf_core_enum_value_exists(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS) +#endif /* * Convenience macro to get the integer value of an enumerator value in @@ -134,12 +275,17 @@ enum bpf_type_info_kind { * present in target kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ -#define bpf_core_enum_value(enum_type, enum_value) \ +#ifdef __clang__ +#define bpf_core_enum_value(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) +#else +#define bpf_core_enum_value(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE) +#endif /* - * bpf_core_read() abstracts away bpf_probe_read() call and captures offset - * relocation for source address using __builtin_preserve_access_index() + * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures + * offset relocation for source address using __builtin_preserve_access_index() * built-in, provided by Clang. * * __builtin_preserve_access_index() takes as an argument an expression of @@ -147,24 +293,40 @@ enum bpf_type_info_kind { * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ - probe_read(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user(dst, sz, src) \ + bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() * additionally emitting BPF CO-RE field relocation for specified source * argument. */ #define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) + +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user_str(dst, sz, src) \ + bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) + +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id); + +/* + * Cast provided pointer *ptr* into a pointer to a specified *type* in such + * a way that BPF verifier will become aware of associated kernel-side BTF + * type. This allows to access members of kernel types directly without the + * need to use BPF_CORE_READ() macros. + */ +#define bpf_core_cast(ptr, type) \ + ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type))) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) @@ -223,30 +385,29 @@ enum bpf_type_info_kind { read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) /* "recursively" read a sequence of inner pointers using local __t var */ -#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); -#define ___rd_last(...) \ - ___read(bpf_core_read, &__t, \ - ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); -#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) -#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___read_ptrs(src, ...) \ - ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) - -#define ___core_read0(fn, dst, src, a) \ +#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); +#define ___rd_last(fn, ...) \ + ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) +#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___read_ptrs(fn, src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) + +#define ___core_read0(fn, fn_ptr, dst, src, a) \ ___read(fn, dst, ___type(src), src, a); -#define ___core_readN(fn, dst, src, ...) \ - ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ +#define ___core_readN(fn, fn_ptr, dst, src, ...) \ + ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ___last(__VA_ARGS__)); -#define ___core_read(fn, dst, src, a, ...) \ - ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ +#define ___core_read(fn, fn_ptr, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ src, a, ##__VA_ARGS__) /* @@ -254,20 +415,73 @@ enum bpf_type_info_kind { * BPF_CORE_READ(), in which final field is read into user-provided storage. * See BPF_CORE_READ() below for more details on general usage. */ -#define BPF_CORE_READ_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ - }) +#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_INTO() */ +#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_kernel, bpf_probe_read_kernel, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns * corresponding error code) bpf_core_read_str() for final string read. */ -#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ - }) +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_str, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ +#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_kernel_str, bpf_probe_read_kernel, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially @@ -278,25 +492,61 @@ enum bpf_type_info_kind { * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); * * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF - * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically + * equivalent to: * 1. const void *__t = s->a.b.c; * 2. __t = __t->d.e; * 3. __t = __t->f; * 4. return __t->g; * * Equivalence is logical, because there is a heavy type casting/preservation - * involved, as well as all the reads are happening through bpf_probe_read() - * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * involved, as well as all the reads are happening through + * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to + * emit CO-RE relocations. * * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. */ -#define BPF_CORE_READ(src, a, ...) \ - ({ \ - ___type(src, a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ - __r; \ - }) +#define BPF_CORE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Variant of BPF_CORE_READ() for reading from user-space memory. + * + * NOTE: all the source types involved are still *kernel types* and need to + * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will + * fail. Custom user types are not relocatable with CO-RE. + * The typical situation in which BPF_CORE_READ_USER() might be used is to + * read kernel UAPI types from the user-space memory passed in as a syscall + * input argument. + */ +#define BPF_CORE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ() */ +#define BPF_PROBE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) #endif diff --git a/bpf/libbpf/bpf_tracing.h b/bpf/libbpf/bpf_tracing.h index 789556811e2..a6cc5c569c6 100644 --- a/bpf/libbpf/bpf_tracing.h +++ b/bpf/libbpf/bpf_tracing.h @@ -2,6 +2,8 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ +#include "bpf_core_read.h" + /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 @@ -443,9 +445,9 @@ struct pt_regs; #else #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) + ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) + ({ bpf_probe_read(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif diff --git a/bpf/process/bpf_execve_bprm_commit_creds.c b/bpf/process/bpf_execve_bprm_commit_creds.c index ef75a5945fe..e109022551b 100644 --- a/bpf/process/bpf_execve_bprm_commit_creds.c +++ b/bpf/process/bpf_execve_bprm_commit_creds.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include "api.h" +#include "types/probe_read_kernel_or_user.h" #include "bpf_tracing.h" #include "common.h" diff --git a/bpf/process/bpf_execve_event.c b/bpf/process/bpf_execve_event.c index a00ae3a115f..f7f0a0c2a96 100644 --- a/bpf/process/bpf_execve_event.c +++ b/bpf/process/bpf_execve_event.c @@ -41,13 +41,13 @@ read_args(void *ctx, struct msg_execve_event *event) long off; int err; - probe_read(&mm, sizeof(mm), _(&task->mm)); + probe_read_kernel(&mm, sizeof(mm), _(&task->mm)); if (!mm) return 0; - probe_read(&start_stack, sizeof(start_stack), - _(&mm->arg_start)); - probe_read(&end_stack, sizeof(start_stack), _(&mm->arg_end)); + probe_read_kernel(&start_stack, sizeof(start_stack), + _(&mm->arg_start)); + probe_read_kernel(&end_stack, sizeof(start_stack), _(&mm->arg_end)); if (!start_stack || !end_stack) return 0; @@ -58,7 +58,7 @@ read_args(void *ctx, struct msg_execve_event *event) return 0; /* poor man's strlen */ - off = probe_read_str(&heap->maxpath, 4096, (char *)start_stack); + off = probe_read_user_str(&heap->maxpath, 4096, (char *)start_stack); if (off < 0) return 0; @@ -78,7 +78,7 @@ read_args(void *ctx, struct msg_execve_event *event) if (args_size < BUFFER && args_size < free_size) { size = args_size & 0x3ff /* BUFFER - 1 */; - err = probe_read(args, size, (char *)start_stack); + err = probe_read_user(args, size, (char *)start_stack); if (err < 0) { p->flags |= EVENT_ERROR_ARGS; size = 0; @@ -87,7 +87,7 @@ read_args(void *ctx, struct msg_execve_event *event) size = data_event_bytes(ctx, (struct data_event_desc *)args, (unsigned long)start_stack, args_size, - (struct bpf_map_def *)&data_heap); + (struct bpf_map_def *)&data_heap, true); if (size > 0) p->flags |= EVENT_DATA_ARGS; } @@ -104,14 +104,14 @@ read_path(void *ctx, struct msg_execve_event *event, void *filename) earg = (void *)p + offsetof(struct msg_process, args); - size = probe_read_str(earg, MAXARGLENGTH - 1, filename); + size = probe_read_kernel_str(earg, MAXARGLENGTH - 1, filename); if (size < 0) { flags |= EVENT_ERROR_FILENAME; size = 0; } else if (size == MAXARGLENGTH - 1) { size = data_event_str(ctx, (struct data_event_desc *)earg, (unsigned long)filename, - (struct bpf_map_def *)&data_heap); + (struct bpf_map_def *)&data_heap, false); if (size == 0) flags |= EVENT_ERROR_FILENAME; else @@ -307,7 +307,7 @@ execve_send(struct sched_execve_args *ctx) #ifdef __LARGE_BPF_PROG // read from proc exe stored at execve time if (event->exe.len <= BINARY_PATH_MAX_LEN) { - curr->bin.path_length = probe_read(curr->bin.path, event->exe.len, event->exe.off); + curr->bin.path_length = probe_read_kernel(curr->bin.path, event->exe.len, event->exe.off); if (curr->bin.path_length == 0) curr->bin.path_length = event->exe.len; } @@ -315,7 +315,7 @@ execve_send(struct sched_execve_args *ctx) // reuse p->args first string that contains the filename, this can't be // above 256 in size (otherwise the complete will be send via data msg) // which is okay because we need the 256 first bytes. - curr->bin.path_length = probe_read_str(curr->bin.path, BINARY_PATH_MAX_LEN, &p->args); + curr->bin.path_length = probe_read_kernel_str(curr->bin.path, BINARY_PATH_MAX_LEN, &p->args); if (curr->bin.path_length > 1) { // don't include the NULL byte in the length curr->bin.path_length--; diff --git a/bpf/process/bpf_exit.c b/bpf/process/bpf_exit.c index baadae713f5..355431abc0c 100644 --- a/bpf/process/bpf_exit.c +++ b/bpf/process/bpf_exit.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include "bpf_exit.h" +#include "types/probe_read_kernel_or_user.h" #include "bpf_tracing.h" char _license[] __attribute__((section("license"), used)) = "Dual BSD/GPL"; diff --git a/bpf/process/bpf_exit.h b/bpf/process/bpf_exit.h index b683bac7a5d..7df5bdd84af 100644 --- a/bpf/process/bpf_exit.h +++ b/bpf/process/bpf_exit.h @@ -63,8 +63,8 @@ static inline __attribute__((always_inline)) void event_exit_send(void *ctx, __u * entry from the execve_map anyway and explicitly set it to the to tgid. */ exit->info.tid = tgid; - probe_read(&exit->info.code, sizeof(exit->info.code), - _(&task->exit_code)); + probe_read_kernel(&exit->info.code, sizeof(exit->info.code), + _(&task->exit_code)); perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map, BPF_F_CURRENT_CPU, exit, size); } diff --git a/bpf/process/bpf_generic_retkprobe.c b/bpf/process/bpf_generic_retkprobe.c index f5b9bca0b9a..7f20ea60559 100644 --- a/bpf/process/bpf_generic_retkprobe.c +++ b/bpf/process/bpf_generic_retkprobe.c @@ -69,16 +69,13 @@ struct { __attribute__((section((MAIN)), used)) int BPF_KRETPROBE(generic_retkprobe_event, unsigned long ret) { - struct execve_map_value *enter; struct msg_generic_kprobe *e; - struct retprobe_info info; struct event_config *config; - bool walker = false; - int zero = 0; - __u32 ppid; - long size = 0; - long ty_arg, do_copy; + struct retprobe_info info; __u64 pid_tgid; + long size = 0; + int zero = 0; + long ty_arg; e = map_lookup_elem(&process_call_heap, &zero); if (!e) @@ -95,16 +92,11 @@ BPF_KRETPROBE(generic_retkprobe_event, unsigned long ret) pid_tgid = get_current_pid_tgid(); e->tid = (__u32)pid_tgid; - if (!retprobe_map_get(e->func_id, e->retprobe_id, &info)) - return 0; - - *(unsigned long *)e->args = info.ktime_enter; size += sizeof(info.ktime_enter); ty_arg = config->argreturn; - do_copy = config->argreturncopy; if (ty_arg) { - size += read_call_arg(ctx, e, 0, ty_arg, size, ret, 0, (struct bpf_map_def *)data_heap_ptr); + size += read_call_arg(ctx, e, 0, ty_arg, size, ret, config->argmreturn, (struct bpf_map_def *)data_heap_ptr); #ifdef __LARGE_BPF_PROG struct socket_owner owner; switch (config->argreturnaction) { @@ -121,6 +113,43 @@ BPF_KRETPROBE(generic_retkprobe_event, unsigned long ret) #endif } + e->ret = ret; + e->common.size = size; + e->common.ktime = ktime_get_ns(); + + tail_call(ctx, &retkprobe_calls, TAIL_CALL_FILTER); + return 1; +} + +__attribute__((section("kprobe/2"), used)) int +BPF_KRETPROBE(generic_retkprobe_copy_arg) +{ + struct execve_map_value *enter; + struct msg_generic_kprobe *e; + struct event_config *config; + struct retprobe_info info; + bool walker = false; + unsigned long ret; + bool userspace; + long size = 0; + int zero = 0; + __u32 ppid; + + e = map_lookup_elem(&process_call_heap, &zero); + if (!e) + return 0; + + config = map_lookup_elem(&config_map, &e->idx); + if (!config) + return 0; + + if (!retprobe_map_get(e->func_id, e->retprobe_id, &info)) + return 0; + + *(unsigned long *)e->args = info.ktime_enter; + size = e->common.size; + ret = e->ret; + /* * 0x1000 should be maximum argument length, so masking * with 0x1fff is safe and verifier will be happy. @@ -128,12 +157,13 @@ BPF_KRETPROBE(generic_retkprobe_event, unsigned long ret) asm volatile("%[size] &= 0x1fff;\n" ::[size] "+r"(size) :); - switch (do_copy) { + userspace = is_userspace_data(info.meta); + switch (config->argreturncopy) { case char_buf: - size += __copy_char_buf(ctx, size, info.ptr, ret, false, e, (struct bpf_map_def *)data_heap_ptr); + size += __copy_char_buf(ctx, size, info.ptr, ret, false, e, (struct bpf_map_def *)data_heap_ptr, userspace); break; case char_iovec: - size += __copy_char_iovec(size, info.ptr, info.cnt, ret, e); + size += __copy_char_iovec(size, info.ptr, info.cnt, ret, e, userspace); default: break; } @@ -146,7 +176,6 @@ BPF_KRETPROBE(generic_retkprobe_event, unsigned long ret) e->common.pad[0] = 0; e->common.pad[1] = 0; e->common.size = size; - e->common.ktime = ktime_get_ns(); if (enter) { e->current.pid = enter->key.pid; diff --git a/bpf/process/bpf_generic_tracepoint.c b/bpf/process/bpf_generic_tracepoint.c index f84367b9b23..1456784fa22 100644 --- a/bpf/process/bpf_generic_tracepoint.c +++ b/bpf/process/bpf_generic_tracepoint.c @@ -69,14 +69,14 @@ static inline __attribute__((always_inline)) unsigned long get_ctx_ul(void *src, case u64_ty: { u64 ret; - probe_read(&ret, sizeof(u64), src); + probe_read_kernel(&ret, sizeof(u64), src); return ret; } case size_type: { size_t ret; - probe_read(&ret, sizeof(size_t), src); + probe_read_kernel(&ret, sizeof(size_t), src); return (unsigned long)ret; } @@ -84,7 +84,7 @@ static inline __attribute__((always_inline)) unsigned long get_ctx_ul(void *src, case s32_ty: { s32 ret; - probe_read(&ret, sizeof(u32), src); + probe_read_kernel(&ret, sizeof(u32), src); return ret; } @@ -92,21 +92,21 @@ static inline __attribute__((always_inline)) unsigned long get_ctx_ul(void *src, case u32_ty: { u32 ret; - probe_read(&ret, sizeof(u32), src); + probe_read_kernel(&ret, sizeof(u32), src); return ret; } case char_buf: case string_type: { char *buff; - probe_read(&buff, sizeof(char *), src); + probe_read_kernel(&buff, sizeof(char *), src); return (unsigned long)buff; } case data_loc_type: { u32 ret; - probe_read(&ret, sizeof(ret), src); + probe_read_kernel(&ret, sizeof(ret), src); return ret; } @@ -117,14 +117,14 @@ static inline __attribute__((always_inline)) unsigned long get_ctx_ul(void *src, case skb_type: { struct sk_buff *skb; - probe_read(&skb, sizeof(struct sk_buff *), src); + probe_read_kernel(&skb, sizeof(struct sk_buff *), src); return (unsigned long)skb; } case sock_type: { struct sock *sk; - probe_read(&sk, sizeof(struct sock *), src); + probe_read_kernel(&sk, sizeof(struct sock *), src); return (unsigned long)sk; } diff --git a/bpf/process/bpf_loader.c b/bpf/process/bpf_loader.c index 332fc289bfd..0a0a2ddb7c6 100644 --- a/bpf/process/bpf_loader.c +++ b/bpf/process/bpf_loader.c @@ -3,6 +3,7 @@ #include "vmlinux.h" #include "api.h" +#include "types/probe_read_kernel_or_user.h" #include "bpf_tracing.h" #include "bpf_helpers.h" #include "bpf_event.h" @@ -115,11 +116,11 @@ loader_kprobe(struct pt_regs *ctx) if (!msg->buildid_size) return 0; - probe_read(&msg->buildid[0], sizeof(msg->buildid), - _(&mmap_event->build_id[0])); + probe_read_kernel(&msg->buildid[0], sizeof(msg->buildid), + _(&mmap_event->build_id[0])); path = BPF_CORE_READ(mmap_event, file_name); - len = probe_read_str(&msg->path, sizeof(msg->path), path); + len = probe_read_kernel_str(&msg->path, sizeof(msg->path), path); msg->path_size = (__u32)len; msg->pid = tgid; diff --git a/bpf/process/bpf_process_event.h b/bpf/process/bpf_process_event.h index b2b46845469..7a018acadf3 100644 --- a/bpf/process/bpf_process_event.h +++ b/bpf/process/bpf_process_event.h @@ -39,15 +39,15 @@ __get_auid(struct task_struct *task) return auid; if (bpf_core_field_exists(task->loginuid)) { - probe_read(&auid, sizeof(auid), _(&task->loginuid.val)); + probe_read_kernel(&auid, sizeof(auid), _(&task->loginuid.val)); } else { struct audit_task_info *audit; if (bpf_core_field_exists(task->audit)) { - probe_read(&audit, sizeof(audit), _(&task->audit)); + probe_read_kernel(&audit, sizeof(audit), _(&task->audit)); if (audit) { - probe_read(&auid, sizeof(__u32), - _(&audit->loginuid)); + probe_read_kernel(&auid, sizeof(__u32), + _(&audit->loginuid)); } } } @@ -88,7 +88,7 @@ static inline __attribute__((always_inline)) bool IS_ROOT(struct dentry *dentry) { struct dentry *d_parent; - probe_read(&d_parent, sizeof(d_parent), _(&dentry->d_parent)); + probe_read_kernel(&d_parent, sizeof(d_parent), _(&dentry->d_parent)); return (dentry == d_parent); } @@ -97,7 +97,7 @@ hlist_bl_unhashed(const struct hlist_bl_node *h) { struct hlist_bl_node **pprev; - probe_read(&pprev, sizeof(pprev), _(&h->pprev)); + probe_read_kernel(&pprev, sizeof(pprev), _(&h->pprev)); return !pprev; } @@ -153,7 +153,7 @@ prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namele // Needed to bound that for probe_read call. asm volatile("%[namelen] &= 0xff;\n" ::[namelen] "+r"(namelen) :); - probe_read(buf + buffer_offset + write_slash, namelen * sizeof(char), name); + probe_read_kernel(buf + buffer_offset + write_slash, namelen * sizeof(char), name); *bufptr = buf + buffer_offset; return write_slash ? 0 : -ENAMETOOLONG; @@ -204,28 +204,28 @@ cwd_read(struct cwd_read_data *data) return 1; } - probe_read(&vfsmnt_mnt_root, sizeof(vfsmnt_mnt_root), - _(&vfsmnt->mnt_root)); + probe_read_kernel(&vfsmnt_mnt_root, sizeof(vfsmnt_mnt_root), + _(&vfsmnt->mnt_root)); if (dentry == vfsmnt_mnt_root || IS_ROOT(dentry)) { struct mount *parent; - probe_read(&parent, sizeof(parent), _(&mnt->mnt_parent)); + probe_read_kernel(&parent, sizeof(parent), _(&mnt->mnt_parent)); /* Global root? */ if (data->mnt != parent) { - probe_read(&data->dentry, sizeof(data->dentry), - _(&mnt->mnt_mountpoint)); + probe_read_kernel(&data->dentry, sizeof(data->dentry), + _(&mnt->mnt_mountpoint)); data->mnt = parent; - probe_read(&data->vfsmnt, sizeof(data->vfsmnt), - _(&mnt->mnt)); + probe_read_kernel(&data->vfsmnt, sizeof(data->vfsmnt), + _(&mnt->mnt)); return 0; } // resolved all path components successfully data->resolved = true; return 1; } - probe_read(&parent, sizeof(parent), _(&dentry->d_parent)); - probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); + probe_read_kernel(&parent, sizeof(parent), _(&dentry->d_parent)); + probe_read_kernel(&d_name, sizeof(d_name), _(&dentry->d_name)); error = prepend_name(data->bf, &data->bptr, &data->blen, (const char *)d_name.name, d_name.len); // This will happen where the dentry name does not fit in the buffer. @@ -256,11 +256,11 @@ prepend_path(const struct path *path, const struct path *root, char *bf, }; int error = 0; - probe_read(&data.root_dentry, sizeof(data.root_dentry), - _(&root->dentry)); - probe_read(&data.root_mnt, sizeof(data.root_mnt), _(&root->mnt)); - probe_read(&data.dentry, sizeof(data.dentry), _(&path->dentry)); - probe_read(&data.vfsmnt, sizeof(data.vfsmnt), _(&path->mnt)); + probe_read_kernel(&data.root_dentry, sizeof(data.root_dentry), + _(&root->dentry)); + probe_read_kernel(&data.root_mnt, sizeof(data.root_mnt), _(&root->mnt)); + probe_read_kernel(&data.dentry, sizeof(data.dentry), _(&path->dentry)); + probe_read_kernel(&data.vfsmnt, sizeof(data.vfsmnt), _(&path->mnt)); data.mnt = real_mount(data.vfsmnt); #ifndef __V61_BPF_PROG @@ -290,7 +290,7 @@ path_with_deleted(const struct path *path, const struct path *root, char *bf, { struct dentry *dentry; - probe_read(&dentry, sizeof(dentry), _(&path->dentry)); + probe_read_kernel(&dentry, sizeof(dentry), _(&path->dentry)); if (d_unlinked(dentry)) { int error = prepend(buf, buflen, " (deleted)", 10); if (error) // will never happen as prepend will never return a value != 0 @@ -342,7 +342,7 @@ __d_path_local(const struct path *path, char *buf, int *buflen, int *error) struct fs_struct *fs; task = (struct task_struct *)get_current_task(); - probe_read(&fs, sizeof(fs), _(&task->fs)); + probe_read_kernel(&fs, sizeof(fs), _(&task->fs)); *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); return res; } @@ -386,7 +386,7 @@ getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) int flags = 0, size; char *buffer; - probe_read(&fs, sizeof(fs), _(&task->fs)); + probe_read_kernel(&fs, sizeof(fs), _(&task->fs)); if (!fs) { curr->flags |= EVENT_ERROR_CWD; return 0; @@ -400,7 +400,7 @@ getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) :); asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); - probe_read((char *)curr + offset, size, buffer); + probe_read_kernel((char *)curr + offset, size, buffer); // Unfortunate special case for '/' where nothing was added we need // to truncate with '\n' for parser. @@ -421,9 +421,9 @@ event_set_clone(struct msg_process *pid) static inline __attribute__((always_inline)) void __get_caps(struct msg_capabilities *msg, const struct cred *cred) { - probe_read(&msg->effective, sizeof(__u64), _(&cred->cap_effective)); - probe_read(&msg->inheritable, sizeof(__u64), _(&cred->cap_inheritable)); - probe_read(&msg->permitted, sizeof(__u64), _(&cred->cap_permitted)); + probe_read_kernel(&msg->effective, sizeof(__u64), _(&cred->cap_effective)); + probe_read_kernel(&msg->inheritable, sizeof(__u64), _(&cred->cap_inheritable)); + probe_read_kernel(&msg->permitted, sizeof(__u64), _(&cred->cap_permitted)); } /* @get_current_subj_caps: @@ -463,7 +463,7 @@ get_current_subj_caps(struct msg_capabilities *msg, struct task_struct *task) const struct cred *cred; /* Get the task's subjective creds */ - probe_read(&cred, sizeof(cred), _(&task->cred)); + probe_read_kernel(&cred, sizeof(cred), _(&task->cred)); __get_caps(msg, cred); } @@ -473,17 +473,17 @@ get_current_subj_creds_uids(struct msg_cred_minimal *info, struct task_struct *t const struct cred *cred; /* Get the task's subjective creds */ - probe_read(&cred, sizeof(cred), _(&task->cred)); - - probe_read(&info->uid, sizeof(__u32), _(&cred->uid)); - probe_read(&info->gid, sizeof(__u32), _(&cred->gid)); - probe_read(&info->euid, sizeof(__u32), _(&cred->euid)); - probe_read(&info->egid, sizeof(__u32), _(&cred->egid)); - probe_read(&info->suid, sizeof(__u32), _(&cred->suid)); - probe_read(&info->sgid, sizeof(__u32), _(&cred->sgid)); - probe_read(&info->fsuid, sizeof(__u32), _(&cred->fsuid)); - probe_read(&info->fsgid, sizeof(__u32), _(&cred->fsgid)); - probe_read(&info->securebits, sizeof(__u32), _(&cred->securebits)); + probe_read_kernel(&cred, sizeof(cred), _(&task->cred)); + + probe_read_kernel(&info->uid, sizeof(__u32), _(&cred->uid)); + probe_read_kernel(&info->gid, sizeof(__u32), _(&cred->gid)); + probe_read_kernel(&info->euid, sizeof(__u32), _(&cred->euid)); + probe_read_kernel(&info->egid, sizeof(__u32), _(&cred->egid)); + probe_read_kernel(&info->suid, sizeof(__u32), _(&cred->suid)); + probe_read_kernel(&info->sgid, sizeof(__u32), _(&cred->sgid)); + probe_read_kernel(&info->fsuid, sizeof(__u32), _(&cred->fsuid)); + probe_read_kernel(&info->fsgid, sizeof(__u32), _(&cred->fsgid)); + probe_read_kernel(&info->securebits, sizeof(__u32), _(&cred->securebits)); } static inline __attribute__((always_inline)) void @@ -492,55 +492,55 @@ get_namespaces(struct msg_ns *msg, struct task_struct *task) struct nsproxy *nsproxy; struct nsproxy nsp; - probe_read(&nsproxy, sizeof(nsproxy), _(&task->nsproxy)); - probe_read(&nsp, sizeof(nsp), _(nsproxy)); + probe_read_kernel(&nsproxy, sizeof(nsproxy), _(&task->nsproxy)); + probe_read_kernel(&nsp, sizeof(nsp), _(nsproxy)); - probe_read(&msg->uts_inum, sizeof(msg->uts_inum), - _(&nsp.uts_ns->ns.inum)); - probe_read(&msg->ipc_inum, sizeof(msg->ipc_inum), - _(&nsp.ipc_ns->ns.inum)); - probe_read(&msg->mnt_inum, sizeof(msg->mnt_inum), - _(&nsp.mnt_ns->ns.inum)); + probe_read_kernel(&msg->uts_inum, sizeof(msg->uts_inum), + _(&nsp.uts_ns->ns.inum)); + probe_read_kernel(&msg->ipc_inum, sizeof(msg->ipc_inum), + _(&nsp.ipc_ns->ns.inum)); + probe_read_kernel(&msg->mnt_inum, sizeof(msg->mnt_inum), + _(&nsp.mnt_ns->ns.inum)); { struct pid *p = 0; - probe_read(&p, sizeof(p), _(&task->thread_pid)); + probe_read_kernel(&p, sizeof(p), _(&task->thread_pid)); if (p) { int level = 0; struct upid up; - probe_read(&level, sizeof(level), _(&p->level)); - probe_read(&up, sizeof(up), _(&p->numbers[level])); - probe_read(&msg->pid_inum, sizeof(msg->pid_inum), - _(&up.ns->ns.inum)); + probe_read_kernel(&level, sizeof(level), _(&p->level)); + probe_read_kernel(&up, sizeof(up), _(&p->numbers[level])); + probe_read_kernel(&msg->pid_inum, sizeof(msg->pid_inum), + _(&up.ns->ns.inum)); } else msg->pid_inum = 0; } - probe_read(&msg->pid_for_children_inum, - sizeof(msg->pid_for_children_inum), - _(&nsp.pid_ns_for_children->ns.inum)); - probe_read(&msg->net_inum, sizeof(msg->net_inum), - _(&nsp.net_ns->ns.inum)); + probe_read_kernel(&msg->pid_for_children_inum, + sizeof(msg->pid_for_children_inum), + _(&nsp.pid_ns_for_children->ns.inum)); + probe_read_kernel(&msg->net_inum, sizeof(msg->net_inum), + _(&nsp.net_ns->ns.inum)); // this also includes time_ns_for_children if (bpf_core_field_exists(nsproxy->time_ns)) { - probe_read(&msg->time_inum, sizeof(msg->time_inum), - _(&nsp.time_ns->ns.inum)); - probe_read(&msg->time_for_children_inum, - sizeof(msg->time_for_children_inum), - _(&nsp.time_ns_for_children->ns.inum)); + probe_read_kernel(&msg->time_inum, sizeof(msg->time_inum), + _(&nsp.time_ns->ns.inum)); + probe_read_kernel(&msg->time_for_children_inum, + sizeof(msg->time_for_children_inum), + _(&nsp.time_ns_for_children->ns.inum)); } - probe_read(&msg->cgroup_inum, sizeof(msg->cgroup_inum), - _(&nsp.cgroup_ns->ns.inum)); + probe_read_kernel(&msg->cgroup_inum, sizeof(msg->cgroup_inum), + _(&nsp.cgroup_ns->ns.inum)); { struct mm_struct *mm; struct user_namespace *user_ns; - probe_read(&mm, sizeof(mm), _(&task->mm)); - probe_read(&user_ns, sizeof(user_ns), _(&mm->user_ns)); - probe_read(&msg->user_inum, sizeof(msg->user_inum), - _(&user_ns->ns.inum)); + probe_read_kernel(&mm, sizeof(mm), _(&task->mm)); + probe_read_kernel(&user_ns, sizeof(user_ns), _(&mm->user_ns)); + probe_read_kernel(&msg->user_inum, sizeof(msg->user_inum), + _(&user_ns->ns.inum)); } } @@ -566,7 +566,7 @@ __event_get_current_cgroup_name(struct cgroup *cgrp, name = get_cgroup_name(cgrp); if (name) - probe_read_str(msg->kube.docker_id, KN_NAME_LENGTH, name); + probe_read_kernel_str(msg->kube.docker_id, KN_NAME_LENGTH, name); else process->flags |= EVENT_ERROR_CGROUP_NAME; } diff --git a/bpf/process/data_event.h b/bpf/process/data_event.h index 974fee26b2c..35a56a941df 100644 --- a/bpf/process/data_event.h +++ b/bpf/process/data_event.h @@ -6,9 +6,10 @@ #include "bpf_tracing.h" #include "data_msg.h" +#include "types/probe_read_kernel_or_user.h" static inline __attribute__((always_inline)) long -__do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) +__do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes, bool userspace) { int err; @@ -21,32 +22,26 @@ __do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) bytes = MSG_DATA_ARG_LEN; a: // < 5.3 verifier still requires value masking like 'val &= xxx' -#ifndef __LARGE_BPF_PROG - asm volatile("%[bytes] &= 0x3fff;\n" - : - : [bytes] "+r"(bytes) - :); -#endif - err = probe_read(&msg->arg[0], bytes, (char *)uptr); + err = probe_read_kernel_or_user_masked(&msg->arg[0], bytes, 0x7fff, (char *)uptr, userspace); if (err < 0) return err; msg->common.size = offsetof(struct msg_data, arg) + bytes; - perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size & 0xffff); return bytes; b: return -1; } static inline __attribute__((always_inline)) long -do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) +do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes, bool userspace) { size_t rd_bytes = 0; int err, i __maybe_unused; #ifdef __LARGE_BPF_PROG for (i = 0; i < 10; i++) { - err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); + err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes, userspace); if (err < 0) return err; rd_bytes += err; @@ -54,12 +49,12 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) return rd_bytes; } #else -#define BYTES_COPY \ - err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); \ - if (err < 0) \ - return err; \ - rd_bytes += err; \ - if (rd_bytes == bytes) \ +#define BYTES_COPY \ + err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes, userspace); \ + if (err < 0) \ + return err; \ + rd_bytes += err; \ + if (rd_bytes == bytes) \ return rd_bytes; #define BYTES_COPY_5 BYTES_COPY BYTES_COPY BYTES_COPY BYTES_COPY BYTES_COPY @@ -75,7 +70,7 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) } static inline __attribute__((always_inline)) long -__do_str(void *ctx, struct msg_data *msg, unsigned long arg, bool *done) +__do_str(void *ctx, struct msg_data *msg, unsigned long arg, bool *done, bool userspace) { size_t size, max = sizeof(msg->arg) - 1; long ret; @@ -88,7 +83,8 @@ __do_str(void *ctx, struct msg_data *msg, unsigned long arg, bool *done) : [max] "+r"(max) :); - ret = probe_read_str(&msg->arg[0], max, (char *)arg); + ret = probe_read_kernel_or_user_str(&msg->arg[0], max, (char *)arg, userspace); + if (ret < 0) return ret; @@ -111,7 +107,7 @@ __do_str(void *ctx, struct msg_data *msg, unsigned long arg, bool *done) static inline __attribute__((always_inline)) long do_str(void *ctx, struct msg_data *msg, unsigned long arg, - size_t bytes __maybe_unused) + size_t bytes __maybe_unused, bool userspace) { size_t rd_bytes = 0; bool done = false; @@ -121,7 +117,7 @@ do_str(void *ctx, struct msg_data *msg, unsigned long arg, #define __CNT 2 #pragma unroll for (i = 0; i < __CNT; i++) { - ret = __do_str(ctx, msg, arg + rd_bytes, &done); + ret = __do_str(ctx, msg, arg + rd_bytes, &done, userspace); if (ret < 0) return ret; rd_bytes += ret; @@ -137,7 +133,8 @@ do_str(void *ctx, struct msg_data *msg, unsigned long arg, static inline __attribute__((always_inline)) size_t data_event( void *ctx, struct data_event_desc *desc, unsigned long uptr, size_t size, struct bpf_map_def *heap, - long (*do_data_event)(void *, struct msg_data *, unsigned long, size_t)) + long (*do_data_event)(void *, struct msg_data *, unsigned long, size_t, bool), + bool userspace) { struct msg_data *msg; int zero = 0, err; @@ -165,7 +162,7 @@ static inline __attribute__((always_inline)) size_t data_event( * Leftover for data_event_str is always 0, because we don't know * how much more was there to copy. */ - err = do_data_event(ctx, msg, uptr, size); + err = do_data_event(ctx, msg, uptr, size, userspace); if (err < 0) { desc->error = err; @@ -194,9 +191,9 @@ static inline __attribute__((always_inline)) size_t data_event( */ static inline __attribute__((always_inline)) size_t data_event_bytes(void *ctx, struct data_event_desc *desc, unsigned long uptr, - size_t size, struct bpf_map_def *heap) + size_t size, struct bpf_map_def *heap, bool userspace) { - return data_event(ctx, desc, uptr, size, heap, do_bytes); + return data_event(ctx, desc, uptr, size, heap, do_bytes, userspace); } /** @@ -211,9 +208,9 @@ data_event_bytes(void *ctx, struct data_event_desc *desc, unsigned long uptr, */ static inline __attribute__((always_inline)) size_t data_event_str(void *ctx, struct data_event_desc *desc, unsigned long uptr, - struct bpf_map_def *heap) + struct bpf_map_def *heap, bool userspace) { - return data_event(ctx, desc, uptr, -1, heap, do_str); + return data_event(ctx, desc, uptr, -1, heap, do_str, userspace); } #endif /* __DATA_EVENT_H__ */ diff --git a/bpf/process/generic_calls.h b/bpf/process/generic_calls.h index d23ac848fbc..e4830eb40c9 100644 --- a/bpf/process/generic_calls.h +++ b/bpf/process/generic_calls.h @@ -44,9 +44,6 @@ generic_process_event(void *ctx, struct bpf_map_def *heap_map, int am; am = (&config->arg0m)[index]; - asm volatile("%[am] &= 0xffff;\n" ::[am] "+r"(am) - :); - errv = read_call_arg(ctx, e, index, ty, total, a, am, data_heap); if (errv > 0) total += errv; @@ -56,7 +53,7 @@ generic_process_event(void *ctx, struct bpf_map_def *heap_map, * do it where it makes most sense. */ if (errv < 0) - return filter_args_reject(e->func_id); + return filter_args_reject(ctx, e->func_id); } e->common.size = total; /* Continue to process other arguments. */ @@ -84,7 +81,7 @@ generic_setup_32bit_syscall(struct msg_generic_kprobe *e, u8 op) case MSG_OP_GENERIC_TRACEPOINT: case MSG_OP_GENERIC_KPROBE: info = (struct thread_info *)get_current_task(); - probe_read(&status, sizeof(status), _(&info->status)); + probe_read_kernel(&status, sizeof(status), _(&info->status)); e->sel.is32BitSyscall = status & TS_COMPAT; default: break; @@ -169,7 +166,7 @@ generic_process_event_and_setup(struct pt_regs *ctx, /* If return arg is needed mark retprobe */ ty = config->argreturn; if (ty > 0) - retprobe_map_set(e->func_id, e->retprobe_id, e->common.ktime, 1); + retprobe_map_set(e->func_id, e->retprobe_id, e->common.ktime, 1, config->argmreturn); #endif #ifdef GENERIC_UPROBE diff --git a/bpf/process/retprobe_map.h b/bpf/process/retprobe_map.h index b1f4b81e3bc..959d7ae8294 100644 --- a/bpf/process/retprobe_map.h +++ b/bpf/process/retprobe_map.h @@ -4,6 +4,9 @@ #ifndef __RETPROBE_MAP_H__ #define __RETPROBE_MAP_H__ +#include "vmlinux.h" +#include "api.h" +#include "types/probe_read_kernel_or_user.h" #include "bpf_tracing.h" struct retprobe_key { @@ -15,6 +18,8 @@ struct retprobe_info { unsigned long ktime_enter; unsigned long ptr; unsigned long cnt; + unsigned int meta; + unsigned int pad; }; struct { @@ -56,11 +61,12 @@ static inline __attribute__((always_inline)) void retprobe_map_clear(__u64 id, } static inline __attribute__((always_inline)) void -retprobe_map_set(__u64 id, __u64 tid, __u64 ktime, unsigned long ptr) +retprobe_map_set(__u64 id, __u64 tid, __u64 ktime, unsigned long ptr, __u32 meta) { struct retprobe_info info = { .ktime_enter = ktime, .ptr = ptr, + .meta = meta, }; struct retprobe_key key = { .id = id, @@ -72,12 +78,13 @@ retprobe_map_set(__u64 id, __u64 tid, __u64 ktime, unsigned long ptr) static inline __attribute__((always_inline)) void retprobe_map_set_iovec(__u64 id, __u64 tid, __u64 ktime, unsigned long ptr, - unsigned long cnt) + unsigned long cnt, __u32 meta) { struct retprobe_info info = { .ktime_enter = ktime, .ptr = ptr, .cnt = cnt, + .meta = meta, }; struct retprobe_key key = { .id = id, diff --git a/bpf/process/types/basic.h b/bpf/process/types/basic.h index 6a622975080..304e40aff36 100644 --- a/bpf/process/types/basic.h +++ b/bpf/process/types/basic.h @@ -22,6 +22,7 @@ #include "common.h" #include "process/data_event.h" #include "process/bpf_enforcer.h" +#include "probe_read_kernel_or_user.h" /* Type IDs form API with user space generickprobe.go */ enum { @@ -161,7 +162,9 @@ struct event_config { __u32 t_arg4_ctx_off; __u32 syscall; __s32 argreturncopy; + __u32 argmreturncopy; __s32 argreturn; + __u32 argmreturn; /* arg return action specifies to act on the return value; currently * supported actions include: TrackSock and UntrackSock. */ @@ -276,7 +279,7 @@ return_stack_error(char *args, int orig, int err) static inline __attribute__((always_inline)) int parse_iovec_array(long off, unsigned long arg, int i, unsigned long max, - struct msg_generic_kprobe *e) + struct msg_generic_kprobe *e, bool userspace) { struct iovec iov; // limit is 1024 using a hack now. For 5.4 kernel we should loop over 1024 @@ -284,7 +287,7 @@ parse_iovec_array(long off, unsigned long arg, int i, unsigned long max, __u64 size; int err; - err = probe_read(&iov, sizeof(iov), (struct iovec *)(arg + index)); + err = probe_read_kernel_or_user(&iov, sizeof(iov), (struct iovec *)(arg + index), userspace); if (err < 0) return char_buf_pagefault; size = iov.iov_len; @@ -292,9 +295,7 @@ parse_iovec_array(long off, unsigned long arg, int i, unsigned long max, size = max; if (size > 4094) return char_buf_toolarge; - asm volatile("%[size] &= 0xfff;\n" ::[size] "+r"(size) - :); - err = probe_read(args_off(e, off), size, (char *)iov.iov_base); + err = probe_read_kernel_or_user_masked(args_off(e, off), size, 0xfff, (char *)iov.iov_base, userspace); if (err < 0) return char_buf_pagefault; return size; @@ -307,7 +308,7 @@ parse_iovec_array(long off, unsigned long arg, int i, unsigned long max, /* embedding this in the loop counter breaks verifier */ \ if (i >= cnt) \ goto char_iovec_done; \ - c = parse_iovec_array(off, arg, i, max, e); \ + c = parse_iovec_array(off, arg, i, max, e, userspace); \ if (c < 0) { \ char *args = args_off(e, off_orig); \ return return_stack_error(args, 0, c); \ @@ -452,7 +453,7 @@ copy_path(char *args, const struct path *arg) asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); - probe_read(curr, size, buffer); + probe_read_kernel(curr, size, buffer); *s = size; size += 4; @@ -483,15 +484,15 @@ copy_path(char *args, const struct path *arg) } static inline __attribute__((always_inline)) long -copy_strings(char *args, char *arg, int max_size) +copy_strings(char *args, char *arg, int max_size, bool userspace) { int *s = (int *)args; long size; - // probe_read_str() always nul-terminates the string. + // probe_read_kernel_or_user_str() always nul-terminates the string. // So add one to the length to allow for it. This should // result in us honouring our max_size correctly. - size = probe_read_str(&args[4], max_size + 1, arg); + size = probe_read_kernel_or_user_str(&args[4], max_size + 1, arg, userspace); if (size <= 1) return invalid_ty; // Remove the nul character from end. @@ -502,51 +503,54 @@ copy_strings(char *args, char *arg, int max_size) } static inline __attribute__((always_inline)) long copy_skb(char *args, - unsigned long arg) + unsigned long arg, + bool userspace) { struct sk_buff *skb = (struct sk_buff *)arg; struct skb_type *skb_event = (struct skb_type *)args; /* struct values */ - probe_read(&skb_event->hash, sizeof(__u32), _(&skb->hash)); - probe_read(&skb_event->len, sizeof(__u32), _(&skb->len)); - probe_read(&skb_event->priority, sizeof(__u32), _(&skb->priority)); - probe_read(&skb_event->mark, sizeof(__u32), _(&skb->mark)); + probe_read_kernel_or_user(&skb_event->hash, sizeof(__u32), _(&skb->hash), userspace); + probe_read_kernel_or_user(&skb_event->len, sizeof(__u32), _(&skb->len), userspace); + probe_read_kernel_or_user(&skb_event->priority, sizeof(__u32), _(&skb->priority), userspace); + probe_read_kernel_or_user(&skb_event->mark, sizeof(__u32), _(&skb->mark), userspace); /* socket data */ - set_event_from_skb(skb_event, skb); + set_event_from_skb(skb_event, skb, userspace); return sizeof(struct skb_type); } static inline __attribute__((always_inline)) long copy_sock(char *args, - unsigned long arg) + unsigned long arg, + bool userspace) { struct sock *sk = (struct sock *)arg; struct sk_type *sk_event = (struct sk_type *)args; - set_event_from_sock(sk_event, sk); + set_event_from_sock(sk_event, sk, userspace); return sizeof(struct sk_type); } static inline __attribute__((always_inline)) long -copy_user_ns(char *args, unsigned long arg) +copy_user_ns(char *args, unsigned long arg, bool userspace) { struct user_namespace *ns = (struct user_namespace *)arg; struct msg_user_namespace *u_ns_info = (struct msg_user_namespace *)args; - probe_read(&u_ns_info->level, sizeof(__s32), _(&ns->level)); - probe_read(&u_ns_info->uid, sizeof(__u32), _(&ns->owner)); - probe_read(&u_ns_info->gid, sizeof(__u32), _(&ns->group)); - probe_read(&u_ns_info->ns_inum, sizeof(__u32), _(&ns->ns.inum)); + probe_read_kernel_or_user(&u_ns_info->level, sizeof(__s32), _(&ns->level), userspace); + probe_read_kernel_or_user(&u_ns_info->uid, sizeof(__u32), _(&ns->owner), userspace); + probe_read_kernel_or_user(&u_ns_info->gid, sizeof(__u32), _(&ns->group), userspace); + probe_read_kernel_or_user(&u_ns_info->ns_inum, sizeof(__u32), _(&ns->ns.inum), userspace); return sizeof(struct msg_user_namespace); } static inline __attribute__((always_inline)) long copy_cred(char *args, - unsigned long arg) + unsigned long arg, + bool userspace) { struct user_namespace *ns; struct cred *cred = (struct cred *)arg; @@ -554,21 +558,21 @@ static inline __attribute__((always_inline)) long copy_cred(char *args, struct msg_capabilities *caps = &info->caps; struct msg_user_namespace *user_ns_info = &info->user_ns; - probe_read(&info->uid, sizeof(__u32), _(&cred->uid)); - probe_read(&info->gid, sizeof(__u32), _(&cred->gid)); - probe_read(&info->euid, sizeof(__u32), _(&cred->euid)); - probe_read(&info->egid, sizeof(__u32), _(&cred->egid)); - probe_read(&info->suid, sizeof(__u32), _(&cred->suid)); - probe_read(&info->sgid, sizeof(__u32), _(&cred->sgid)); - probe_read(&info->fsuid, sizeof(__u32), _(&cred->fsuid)); - probe_read(&info->fsgid, sizeof(__u32), _(&cred->fsgid)); + probe_read_kernel_or_user(&info->uid, sizeof(__u32), _(&cred->uid), userspace); + probe_read_kernel_or_user(&info->gid, sizeof(__u32), _(&cred->gid), userspace); + probe_read_kernel_or_user(&info->euid, sizeof(__u32), _(&cred->euid), userspace); + probe_read_kernel_or_user(&info->egid, sizeof(__u32), _(&cred->egid), userspace); + probe_read_kernel_or_user(&info->suid, sizeof(__u32), _(&cred->suid), userspace); + probe_read_kernel_or_user(&info->sgid, sizeof(__u32), _(&cred->sgid), userspace); + probe_read_kernel_or_user(&info->fsuid, sizeof(__u32), _(&cred->fsuid), userspace); + probe_read_kernel_or_user(&info->fsgid, sizeof(__u32), _(&cred->fsgid), userspace); info->pad = 0; - probe_read(&info->securebits, sizeof(__u32), _(&cred->securebits)); + probe_read_kernel_or_user(&info->securebits, sizeof(__u32), _(&cred->securebits), userspace); __get_caps(caps, cred); - probe_read(&ns, sizeof(ns), _(&cred->user_ns)); - copy_user_ns((char *)user_ns_info, (unsigned long)ns); + probe_read_kernel_or_user(&ns, sizeof(ns), _(&cred->user_ns), userspace); + copy_user_ns((char *)user_ns_info, (unsigned long)ns, userspace); return sizeof(struct msg_cred); } @@ -586,7 +590,7 @@ copy_capability(char *args, unsigned long arg) } static inline __attribute__((always_inline)) long -copy_load_module(char *args, unsigned long arg) +copy_load_module(char *args, unsigned long arg, bool userspace) { int ok; const char *name; @@ -595,32 +599,44 @@ copy_load_module(char *args, unsigned long arg) memset(info, 0, sizeof(struct tg_kernel_module)); - if (BPF_CORE_READ_INTO(&name, mod, name) != 0) - return 0; - - if (probe_read_str(&info->name, TG_MODULE_NAME_LEN - 1, name) < 0) + if (userspace) { + if (BPF_CORE_READ_USER_INTO(&name, mod, name) != 0) + return 0; + } else { + if (BPF_CORE_READ_INTO(&name, mod, name) != 0) + return 0; + } + if (probe_read_kernel_or_user_str(&info->name, TG_MODULE_NAME_LEN - 1, name, userspace) < 0) return 0; - BPF_CORE_READ_INTO(&info->taints, mod, mod, taints); - - if (BPF_CORE_READ_INTO(&ok, mod, sig_ok) == 0) - info->sig_ok = !!ok; + if (userspace) { + BPF_CORE_READ_USER_INTO(&info->taints, mod, mod, taints); + if (BPF_CORE_READ_USER_INTO(&ok, mod, sig_ok) == 0) + info->sig_ok = !!ok; + } else { + BPF_CORE_READ_INTO(&info->taints, mod, mod, taints); + if (BPF_CORE_READ_INTO(&ok, mod, sig_ok) == 0) + info->sig_ok = !!ok; + } return sizeof(struct tg_kernel_module); } static inline __attribute__((always_inline)) long -copy_kernel_module(char *args, unsigned long arg) +copy_kernel_module(char *args, unsigned long arg, bool userspace) { const struct module *mod = (struct module *)arg; struct tg_kernel_module *info = (struct tg_kernel_module *)args; memset(info, 0, sizeof(struct tg_kernel_module)); - if (probe_read_str(&info->name, TG_MODULE_NAME_LEN - 1, mod->name) < 0) + if (probe_read_kernel_or_user_str(&info->name, TG_MODULE_NAME_LEN - 1, mod->name, userspace) < 0) return 0; - BPF_CORE_READ_INTO(&info->taints, mod, taints); + if (userspace) + BPF_CORE_READ_USER_INTO(&info->taints, mod, taints); + else + BPF_CORE_READ_INTO(&info->taints, mod, taints); /* * Todo: allow to check if module is signed here too. @@ -631,10 +647,11 @@ copy_kernel_module(char *args, unsigned long arg) return sizeof(struct tg_kernel_module); } -#define ARGM_INDEX_MASK 0xf +#define ARGM_INDEX_MASK 0xf #define ARGM_RETURN_COPY BIT(4) -#define ARGM_MAX_DATA BIT(5) +#define ARGM_MAX_DATA BIT(5) #define ARGM_USERSPACE_DATA BIT(6) +#define ARGM_RAW_SYSCALLS BIT(7) static inline __attribute__((always_inline)) bool hasReturnCopy(unsigned long argm) @@ -654,6 +671,12 @@ is_userspace_data(unsigned long argm) return (argm & ARGM_USERSPACE_DATA) != 0; } +static inline __attribute__((always_inline)) bool +is_raw_syscalls(unsigned long argm) +{ + return (argm & ARGM_RAW_SYSCALLS) != 0; +} + static inline __attribute__((always_inline)) unsigned long get_arg_meta(int meta, struct msg_generic_kprobe *e) { @@ -675,7 +698,7 @@ get_arg_meta(int meta, struct msg_generic_kprobe *e) static inline __attribute__((always_inline)) long __copy_char_buf(void *ctx, long off, unsigned long arg, unsigned long bytes, bool max_data, struct msg_generic_kprobe *e, - struct bpf_map_def *data_heap) + struct bpf_map_def *data_heap, bool userspace) { int *s = (int *)args_off(e, off); size_t rd_bytes, extra = 8; @@ -690,7 +713,7 @@ __copy_char_buf(void *ctx, long off, unsigned long arg, unsigned long bytes, s[0] = 1; return data_event_bytes(ctx, (struct data_event_desc *)&s[1], - arg, bytes, data_heap) + + arg, bytes, data_heap, userspace) + 4; } s[0] = 0; @@ -701,9 +724,7 @@ __copy_char_buf(void *ctx, long off, unsigned long arg, unsigned long bytes, /* Bound bytes <4095 to ensure bytes does not read past end of buffer */ rd_bytes = bytes < 0x1000 ? bytes : 0xfff; - asm volatile("%[rd_bytes] &= 0xfff;\n" ::[rd_bytes] "+r"(rd_bytes) - :); - err = probe_read(&s[2], rd_bytes, (char *)arg); + err = probe_read_kernel_or_user_masked(&s[2], rd_bytes, 0xfff, (char *)arg, userspace); if (err < 0) return return_error(s, char_buf_pagefault); s[0] = (int)bytes; @@ -714,7 +735,7 @@ __copy_char_buf(void *ctx, long off, unsigned long arg, unsigned long bytes, static inline __attribute__((always_inline)) long copy_char_buf(void *ctx, long off, unsigned long arg, int argm, struct msg_generic_kprobe *e, - struct bpf_map_def *data_heap) + struct bpf_map_def *data_heap, bool userspace) { int *s = (int *)args_off(e, off); unsigned long meta; @@ -723,12 +744,12 @@ copy_char_buf(void *ctx, long off, unsigned long arg, int argm, if (hasReturnCopy(argm)) { u64 retid = retprobe_map_get_key(ctx); - retprobe_map_set(e->func_id, retid, e->common.ktime, arg); + retprobe_map_set(e->func_id, retid, e->common.ktime, arg, argm); return return_error(s, char_buf_saved_for_retprobe); } meta = get_arg_meta(argm, e); - probe_read(&bytes, sizeof(bytes), &meta); - return __copy_char_buf(ctx, off, arg, bytes, has_max_data(argm), e, data_heap); + bytes = meta; + return __copy_char_buf(ctx, off, arg, bytes, has_max_data(argm), e, data_heap, userspace); } static inline __attribute__((always_inline)) u16 @@ -880,11 +901,11 @@ filter_char_buf_equal(struct selector_arg_filter *filter, char *arg_str, uint or : "i"(STRING_MAPS_HEAP_MASK)); #ifdef __LARGE_BPF_PROG if (index <= 5) - probe_read(&heap[1], len, arg_str); + probe_read_kernel(&heap[1], len, arg_str); else - probe_read(&heap[2], len, arg_str); + probe_read_kernel(&heap[2], len, arg_str); #else - probe_read(&heap[1], len, arg_str); + probe_read_kernel(&heap[1], len, arg_str); #endif // Pad string to multiple of key increment size @@ -894,11 +915,11 @@ filter_char_buf_equal(struct selector_arg_filter *filter, char *arg_str, uint or : "i"(STRING_MAPS_HEAP_MASK)); #ifdef __LARGE_BPF_PROG if (index <= 5) - probe_read(heap + len + 1, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); + probe_read_kernel(heap + len + 1, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); else - probe_read(heap + len + 2, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); + probe_read_kernel(heap + len + 2, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); #else - probe_read(heap + len + 1, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); + probe_read_kernel(heap + len + 1, (padded_len - len) & STRING_MAPS_COPY_MASK, zero_heap); #endif } @@ -940,7 +961,7 @@ filter_char_buf_prefix(struct selector_arg_filter *filter, char *arg_str, uint a : [arg_len] "+r"(arg_len) : [mask] "i"(STRING_PREFIX_MAX_LENGTH - 1)); - probe_read(arg->data, arg_len & (STRING_PREFIX_MAX_LENGTH - 1), arg_str); + probe_read_kernel(arg->data, arg_len & (STRING_PREFIX_MAX_LENGTH - 1), arg_str); __u8 *pass = map_lookup_elem(addrmap, arg); @@ -1218,7 +1239,8 @@ filter_inet(struct selector_arg_filter *filter, char *args) static inline __attribute__((always_inline)) long __copy_char_iovec(long off, unsigned long arg, unsigned long cnt, - unsigned long max, struct msg_generic_kprobe *e) + unsigned long max, struct msg_generic_kprobe *e, + bool userspace) { long size, off_orig = off; unsigned long i = 0; @@ -1248,28 +1270,28 @@ copy_char_iovec(void *ctx, long off, unsigned long arg, int argm, if (hasReturnCopy(argm)) { u64 retid = retprobe_map_get_key(ctx); - retprobe_map_set_iovec(e->func_id, retid, e->common.ktime, arg, meta); + retprobe_map_set_iovec(e->func_id, retid, e->common.ktime, arg, meta, argm); return return_error(s, char_buf_saved_for_retprobe); } - return __copy_char_iovec(off, arg, meta, 0, e); + return __copy_char_iovec(off, arg, meta, 0, e, is_userspace_data(argm)); } static inline __attribute__((always_inline)) long -copy_bpf_attr(char *args, unsigned long arg) +copy_bpf_attr(char *args, unsigned long arg, bool userspace) { union bpf_attr *ba = (union bpf_attr *)arg; struct bpf_info_type *bpf_info = (struct bpf_info_type *)args; /* struct values */ - probe_read(&bpf_info->prog_type, sizeof(__u32), _(&ba->prog_type)); - probe_read(&bpf_info->insn_cnt, sizeof(__u32), _(&ba->insn_cnt)); - probe_read(&bpf_info->prog_name, BPF_OBJ_NAME_LEN, _(&ba->prog_name)); + probe_read_kernel_or_user(&bpf_info->prog_type, sizeof(__u32), _(&ba->prog_type), userspace); + probe_read_kernel_or_user(&bpf_info->insn_cnt, sizeof(__u32), _(&ba->insn_cnt), userspace); + probe_read_kernel_or_user(&bpf_info->prog_name, BPF_OBJ_NAME_LEN, _(&ba->prog_name), userspace); return sizeof(struct bpf_info_type); } static inline __attribute__((always_inline)) long -copy_perf_event(char *args, unsigned long arg) +copy_perf_event(char *args, unsigned long arg, bool userspace) { struct perf_event *p_event = (struct perf_event *)arg; struct perf_event_info_type *event_info = @@ -1278,34 +1300,34 @@ copy_perf_event(char *args, unsigned long arg) /* struct values */ __u64 kprobe_func_addr = 0; - probe_read(&kprobe_func_addr, sizeof(__u64), - _(&p_event->attr.kprobe_func)); - probe_read_str(&event_info->kprobe_func, KSYM_NAME_LEN, - (char *)kprobe_func_addr); + probe_read_kernel_or_user(&kprobe_func_addr, sizeof(__u64), + _(&p_event->attr.kprobe_func), userspace); + probe_read_kernel_or_user_str(&event_info->kprobe_func, KSYM_NAME_LEN, + (char *)kprobe_func_addr, userspace); - probe_read(&event_info->type, sizeof(__u32), _(&p_event->attr.type)); - probe_read(&event_info->config, sizeof(__u64), - _(&p_event->attr.config)); - probe_read(&event_info->probe_offset, sizeof(__u64), - _(&p_event->attr.probe_offset)); + probe_read_kernel_or_user(&event_info->type, sizeof(__u32), _(&p_event->attr.type), userspace); + probe_read_kernel_or_user(&event_info->config, sizeof(__u64), + _(&p_event->attr.config), userspace); + probe_read_kernel_or_user(&event_info->probe_offset, sizeof(__u64), + _(&p_event->attr.probe_offset), userspace); return sizeof(struct perf_event_info_type); } static inline __attribute__((always_inline)) long -copy_bpf_map(char *args, unsigned long arg) +copy_bpf_map(char *args, unsigned long arg, bool userspace) { struct bpf_map *bpfmap = (struct bpf_map *)arg; struct bpf_map_info_type *map_info = (struct bpf_map_info_type *)args; /* struct values */ - probe_read(&map_info->map_type, sizeof(__u32), _(&bpfmap->map_type)); - probe_read(&map_info->key_size, sizeof(__u32), _(&bpfmap->key_size)); - probe_read(&map_info->value_size, sizeof(__u32), - _(&bpfmap->value_size)); - probe_read(&map_info->max_entries, sizeof(__u32), - _(&bpfmap->max_entries)); - probe_read(&map_info->map_name, BPF_OBJ_NAME_LEN, _(&bpfmap->name)); + probe_read_kernel_or_user(&map_info->map_type, sizeof(__u32), _(&bpfmap->map_type), userspace); + probe_read_kernel_or_user(&map_info->key_size, sizeof(__u32), _(&bpfmap->key_size), userspace); + probe_read_kernel_or_user(&map_info->value_size, sizeof(__u32), + _(&bpfmap->value_size), userspace); + probe_read_kernel_or_user(&map_info->max_entries, sizeof(__u32), + _(&bpfmap->max_entries), userspace); + probe_read_kernel_or_user(&map_info->map_name, BPF_OBJ_NAME_LEN, _(&bpfmap->name), userspace); return sizeof(struct bpf_map_info_type); } @@ -1313,7 +1335,7 @@ copy_bpf_map(char *args, unsigned long arg) #ifdef __LARGE_BPF_PROG static inline __attribute__((always_inline)) long copy_iov_iter(void *ctx, long off, unsigned long arg, int argm, struct msg_generic_kprobe *e, - struct bpf_map_def *data_heap) + struct bpf_map_def *data_heap, bool userspace) { long iter_iovec = -1, iter_ubuf __maybe_unused = -1; struct iov_iter *iov_iter = (struct iov_iter *)arg; @@ -1328,7 +1350,7 @@ copy_iov_iter(void *ctx, long off, unsigned long arg, int argm, struct msg_gener goto nodata; tmp = _(&iov_iter->iter_type); - probe_read(&iter_type, sizeof(iter_type), tmp); + probe_read_kernel_or_user(&iter_type, sizeof(iter_type), tmp, userspace); if (bpf_core_enum_value_exists(enum iter_type, ITER_IOVEC)) iter_iovec = bpf_core_enum_value(enum iter_type, ITER_IOVEC); @@ -1340,28 +1362,28 @@ copy_iov_iter(void *ctx, long off, unsigned long arg, int argm, struct msg_gener if (iter_type == iter_iovec) { tmp = _(&iov_iter->kvec); - probe_read(&kvec, sizeof(kvec), tmp); + probe_read_kernel_or_user(&kvec, sizeof(kvec), tmp, userspace); tmp = _(&kvec->iov_base); - probe_read(&buf, sizeof(buf), tmp); + probe_read_kernel_or_user(&buf, sizeof(buf), tmp, userspace); tmp = _(&kvec->iov_len); - probe_read(&count, sizeof(count), tmp); + probe_read_kernel_or_user(&count, sizeof(count), tmp, userspace); return __copy_char_buf(ctx, off, (unsigned long)buf, count, - has_max_data(argm), e, data_heap); + has_max_data(argm), e, data_heap, userspace); } #ifdef __V61_BPF_PROG if (iter_type == iter_ubuf) { tmp = _(&iov_iter->ubuf); - probe_read(&buf, sizeof(buf), tmp); + probe_read_kernel_or_user(&buf, sizeof(buf), tmp, userspace); tmp = _(&iov_iter->count); - probe_read(&count, sizeof(count), tmp); + probe_read_kernel_or_user(&count, sizeof(count), tmp, userspace); return __copy_char_buf(ctx, off, (unsigned long)buf, count, - has_max_data(argm), e, data_heap); + has_max_data(argm), e, data_heap, userspace); } #endif @@ -1372,7 +1394,7 @@ copy_iov_iter(void *ctx, long off, unsigned long arg, int argm, struct msg_gener return 8; } #else -#define copy_iov_iter(ctx, orig_off, arg, argm, e, data_heap) 0 +#define copy_iov_iter(ctx, orig_off, arg, argm, e, data_heap, userspace) 0 #endif /* __LARGE_BPF_PROG */ static inline __attribute__((always_inline)) bool is_signed_type(int type) @@ -1416,7 +1438,6 @@ filter_64ty_selector_val(struct selector_arg_filter *filter, char *args) case op_filter_eq: case op_filter_neq: res = (*(u64 *)args == w); - if (filter->op == op_filter_eq && res) return 1; if (filter->op == op_filter_neq && !res) @@ -1721,7 +1742,7 @@ static inline __attribute__((always_inline)) int match_binaries(__u32 selidx) // prepare the key on the stack to perform lookup in the LPM_TRIE memset(&prefix_key, 0, sizeof(prefix_key)); prefix_key.prefixlen = current->bin.path_length * 8; // prefixlen is in bits - ret = probe_read(prefix_key.data, current->bin.path_length & (STRING_PREFIX_MAX_LENGTH - 1), current->bin.path); + ret = probe_read_kernel(prefix_key.data, current->bin.path_length & (STRING_PREFIX_MAX_LENGTH - 1), current->bin.path); if (ret < 0) return 0; found_key = map_lookup_elem(path_map, &prefix_key); @@ -1854,15 +1875,16 @@ selector_arg_offset(__u8 *f, struct msg_generic_kprobe *e, __u32 selidx, return pass ? seloff : 0; } -static inline __attribute__((always_inline)) int filter_args_reject(u64 id) +static inline __attribute__((always_inline)) int filter_args_reject(void *ctx, u64 id) { - u64 tid = get_current_pid_tgid(); - retprobe_map_clear(id, tid); + u64 retid = retprobe_map_get_key(ctx); + + retprobe_map_clear(id, retid); return 0; } static inline __attribute__((always_inline)) int -filter_args(struct msg_generic_kprobe *e, int selidx, void *filter_map, +filter_args(void *ctx, struct msg_generic_kprobe *e, int selidx, void *filter_map, bool is_entry) { __u8 *f; @@ -1882,7 +1904,7 @@ filter_args(struct msg_generic_kprobe *e, int selidx, void *filter_map, * have their arg filters run. */ if (selidx > SELECTORS_ACTIVE) - return filter_args_reject(e->func_id); + return filter_args_reject(ctx, e->func_id); if (e->sel.active[selidx]) { int pass = selector_arg_offset(f, e, selidx, is_entry); @@ -1952,8 +1974,8 @@ installfd(struct msg_generic_kprobe *e, int fd, int name, bool follow) : [size] "+r"(size) :); - probe_read(&val.file[0], size + 4 /* size */ + 4 /* flags */, - &e->args[nameoff]); + probe_read_kernel(&val.file[0], size + 4 /* size */ + 4 /* flags */, + &e->args[nameoff]); map_update_elem(&fdinstall_map, &key, &val, BPF_ANY); } else { err = map_delete_elem(&fdinstall_map, &key); @@ -2100,7 +2122,7 @@ rate_limit(__u64 ratelimit_interval, __u64 ratelimit_scope, struct msg_generic_k } // Clean the heap - probe_read(key->data, MAX_POSSIBLE_ARGS * KEY_BYTES_PER_ARG, ro_heap); + probe_read_kernel(key->data, MAX_POSSIBLE_ARGS * KEY_BYTES_PER_ARG, ro_heap); dst = key->data; for (i = 0; i < MAX_POSSIBLE_ARGS; i++) { @@ -2117,7 +2139,7 @@ rate_limit(__u64 ratelimit_interval, __u64 ratelimit_scope, struct msg_generic_k asm volatile("%[arg_size] &= 0x3f;\n" // ensure this mask is greater than KEY_BYTES_PER_ARG : [arg_size] "+r"(arg_size) :); - probe_read(&dst[index], arg_size, &e->args[key_index]); + probe_read_kernel(&dst[index], arg_size, &e->args[key_index]); index += arg_size; } } @@ -2381,7 +2403,7 @@ filter_read_arg(void *ctx, struct bpf_map_def *heap, if (!e) return 0; selidx = e->tailcall_index_selector; - pass = filter_args(e, selidx & MAX_SELECTORS_MASK, filter, is_entry); + pass = filter_args(ctx, e, selidx & MAX_SELECTORS_MASK, filter, is_entry); if (!pass) { selidx++; if (selidx <= MAX_SELECTORS && e->sel.active[selidx & MAX_SELECTORS_MASK]) { @@ -2389,7 +2411,7 @@ filter_read_arg(void *ctx, struct bpf_map_def *heap, tail_call(ctx, tailcalls, TAIL_CALL_ARGS); } // reject if we did not attempt to tailcall, or if tailcall failed. - return filter_args_reject(e->func_id); + return filter_args_reject(ctx, e->func_id); } // If pass >1 then we need to consult the selector actions @@ -2515,9 +2537,11 @@ read_call_arg(void *ctx, struct msg_generic_kprobe *e, int index, int type, struct bpf_map_def *data_heap) { size_t min_size = type_to_min_size(type, argm); + bool raw_syscalls = is_raw_syscalls(argm); + bool userspace = is_userspace_data(argm); + const struct path *path_arg = 0; char *args = e->args; long size = -1; - const struct path *path_arg = 0; if (orig_off >= 16383 - min_size) { return 0; @@ -2530,50 +2554,46 @@ read_call_arg(void *ctx, struct msg_generic_kprobe *e, int index, int type, switch (type) { case iov_iter_type: - size = copy_iov_iter(ctx, orig_off, arg, argm, e, data_heap); + size = copy_iov_iter(ctx, orig_off, arg, argm, e, data_heap, userspace); break; case kiocb_type: { struct kiocb *kiocb = (struct kiocb *)arg; struct file *file; arg = (unsigned long)_(&kiocb->ki_filp); - probe_read(&file, sizeof(file), (const void *)arg); + probe_read_kernel_or_user(&file, sizeof(file), (const void *)arg, userspace); arg = (unsigned long)file; } // fallthrough to file_ty case file_ty: { - struct file *file; - probe_read(&file, sizeof(file), &arg); + struct file *file = (struct file *)arg; path_arg = _(&file->f_path); goto do_copy_path; } case path_ty: { - probe_read(&path_arg, sizeof(path_arg), &arg); + path_arg = (struct path *)arg; goto do_copy_path; } case fd_ty: { struct fdinstall_key key = { 0 }; struct fdinstall_value *val; - __u32 fd; key.tid = get_current_pid_tgid() >> 32; - probe_read(&fd, sizeof(__u32), &arg); - key.fd = fd; - + key.fd = arg; val = map_lookup_elem(&fdinstall_map, &key); if (val) { __u32 bytes = (__u32)val->file[0]; - probe_read(&args[0], sizeof(__u32), &fd); + *(__u32 *)args = key.fd; asm volatile("%[bytes] &= 0xff;\n" : [bytes] "+r"(bytes) :); - probe_read(&args[4], bytes + 4, (char *)&val->file[0]); + probe_read_kernel(&args[4], bytes + 4, (char *)&val->file[0]); size = bytes + 4 + 4; // flags - probe_read(&args[size], 4, - (char *)&val->file[size - 4]); + probe_read_kernel(&args[size], 4, + (char *)&val->file[size - 4]); size += 4; } else { /* If filter specification is fd type then we @@ -2589,71 +2609,71 @@ read_call_arg(void *ctx, struct msg_generic_kprobe *e, int index, int type, struct file *file; arg = (unsigned long)_(&bprm->file); - probe_read(&file, sizeof(file), (const void *)arg); + probe_read_kernel_or_user(&file, sizeof(file), (const void *)arg, userspace); path_arg = _(&file->f_path); goto do_copy_path; } break; #endif case filename_ty: { - struct filename *file; - probe_read(&file, sizeof(file), &arg); - probe_read(&arg, sizeof(arg), &file->name); + struct filename *file = (struct filename *)arg; + + probe_read_kernel_or_user(&arg, sizeof(arg), &file->name, userspace); } // fallthrough to copy_string case string_type: - size = copy_strings(args, (char *)arg, MAX_STRING); + size = copy_strings(args, (char *)arg, MAX_STRING, userspace); break; case net_dev_ty: { struct net_device *dev = (struct net_device *)arg; - size = copy_strings(args, dev->name, IFNAMSIZ); + size = copy_strings(args, dev->name, IFNAMSIZ, userspace); } break; case data_loc_type: { // data_loc: lower 16 bits is offset from ctx; upper 16 bits is length long dl_len = (arg >> 16) & 0xfff; // masked to 4095 chars char *dl_loc = ctx + (arg & 0xffff); - - size = copy_strings(args, dl_loc, dl_len); + // data_loc will always be a kernel type + size = copy_strings(args, dl_loc, dl_len, false); } break; case syscall64_type: case size_type: case s64_ty: case u64_ty: - probe_read(args, sizeof(__u64), &arg); + *(__u64 *)args = arg; size = sizeof(__u64); break; /* Consolidate all the types to save instructions */ case int_type: case s32_ty: case u32_ty: - probe_read(args, sizeof(__u32), &arg); + *(__u32 *)args = arg; size = sizeof(__u32); break; case s16_ty: case u16_ty: /* read 2 bytes, but send 4 to keep alignment */ - probe_read(args, sizeof(__u16), &arg); + *(__u16 *)args = arg; size = sizeof(__u32); break; case s8_ty: case u8_ty: /* read 1 byte, but send 4 to keep alignment */ - probe_read(args, sizeof(__u8), &arg); + *(__u8 *)args = arg; size = sizeof(__u32); break; case skb_type: - size = copy_skb(args, arg); + size = copy_skb(args, arg, userspace); break; case sock_type: - size = copy_sock(args, arg); + size = copy_sock(args, arg, userspace); // Look up socket in our sock->pid_tgid map update_pid_tid_from_sock(e, arg); break; case cred_type: - size = copy_cred(args, arg); + size = copy_cred(args, arg, userspace); break; case char_buf: - size = copy_char_buf(ctx, orig_off, arg, argm, e, data_heap); + size = copy_char_buf(ctx, orig_off, arg, argm, e, data_heap, userspace); break; case char_iovec: size = copy_char_iovec(ctx, orig_off, arg, argm, e); @@ -2662,23 +2682,28 @@ read_call_arg(void *ctx, struct msg_generic_kprobe *e, int index, int type, // for const_buf_type the size is in the upper 16 bits of the meta argument // bound size to 1023 to help the verifier out size = (argm >> 16) & 0x03ff; - probe_read(args, size, (char *)arg); + // the const_buf_type that represents an array of arguments for raw_syscalls + // is special, as the array contents are in kernel memory, but they point + // to userspace memory. In this case, they will be marked as userspace, but + // we actually want to read kernel memory. The raw_syscalls bit of the meta + // value indicates when the arg is special in this way. + probe_read_kernel_or_user_masked(args, size, 0x3ff, (char *)arg, userspace && !raw_syscalls); break; } case bpf_attr_type: { - size = copy_bpf_attr(args, arg); + size = copy_bpf_attr(args, arg, userspace); break; } case perf_event_type: { - size = copy_perf_event(args, arg); + size = copy_perf_event(args, arg, userspace); break; } case bpf_map_type: { - size = copy_bpf_map(args, arg); + size = copy_bpf_map(args, arg, userspace); break; } case user_namespace_type: { - size = copy_user_ns(args, arg); + size = copy_user_ns(args, arg, userspace); break; } case capability_type: { @@ -2686,18 +2711,18 @@ read_call_arg(void *ctx, struct msg_generic_kprobe *e, int index, int type, break; } case load_module_type: { - size = copy_load_module(args, arg); + size = copy_load_module(args, arg, userspace); break; } case kernel_module_type: { - size = copy_kernel_module(args, arg); + size = copy_kernel_module(args, arg, userspace); break; } case kernel_cap_ty: case cap_inh_ty: case cap_prm_ty: case cap_eff_ty: - probe_read(args, sizeof(__u64), (char *)arg); + probe_read_kernel_or_user(args, sizeof(__u64), (char *)arg, userspace); size = sizeof(__u64); break; default: diff --git a/bpf/process/types/probe_read_kernel_or_user.h b/bpf/process/types/probe_read_kernel_or_user.h new file mode 100644 index 00000000000..98c759ca2d7 --- /dev/null +++ b/bpf/process/types/probe_read_kernel_or_user.h @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright Authors of Cilium */ + +#ifndef __PROBE_READ_KERNEL_OR_USER_H__ +#define __PROBE_READ_KERNEL_OR_USER_H__ + +#include "bpf_core_read.h" + +#define bpf_probe_read_kernel probe_read_kernel +#define bpf_probe_read_user probe_read_user + +#ifdef __PROBE_KERNEL +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user(void *dst, uint32_t size, const void *src, bool userspace) +{ + if (userspace) + return probe_read_user(dst, size, src); + return probe_read_kernel(dst, size, src); +} + +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user_masked(void *dst, uint32_t size, uint32_t size_mask, const void *src, bool userspace) +{ + if (userspace) { + asm volatile("%[size] &= %1;\n" + : [size] "+r"(size) + : "i"(size_mask)); + return probe_read_user(dst, size, src); + } + asm volatile("%[size] &= %1;\n" + : [size] "+r"(size) + : "i"(size_mask)); + return probe_read_kernel(dst, size, src); +} + +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user_str(void *dst, int size, const void *src, bool userspace) +{ + if (userspace) + return probe_read_user_str(dst, size, src); + return probe_read_kernel_str(dst, size, src); +} +#else +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user(void *dst, uint32_t size, const void *src, bool userspace) +{ + return probe_read(dst, size, src); +} + +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user_masked(void *dst, uint32_t size, uint32_t size_mask, const void *src, bool userspace) +{ + asm volatile("%[size] &= %1;\n" + : [size] "+r"(size) + : "i"(size_mask)); + return probe_read(dst, size, src); +} + +static inline __attribute__((always_inline)) int +probe_read_kernel_or_user_str(void *dst, int size, const void *src, bool userspace) +{ + return probe_read_str(dst, size, src); +} +#endif // __PROBE_KERNEL + +/* + * bpf_core_read_kernel_or_user() abstracts away bpf_probe_read_kernel_or_user() call and captures offset + * relocation for source address using __builtin_preserve_access_index() + * built-in, provided by Clang. + */ +#define bpf_core_read_kernel_or_user(userspace, dst, sz, sz_mask, src) \ + probe_read_kernel_or_user(dst, sz, sz_mask, \ + (const void *)__builtin_preserve_access_index(src), \ + userspace) + +/* + * bpf_core_read_kernel_or_user_str() is a thin wrapper around bpf_probe_read_kernel_or_user_str() + * additionally emitting BPF CO-RE field relocation for specified source + * argument. + */ +#define bpf_core_read_kernel_or_user_str(userspace, dst, sz, src) \ + probe_read_kernel_or_user_str(dst, sz, \ + (const void *)__builtin_preserve_access_index(src), \ + userspace) + +/* + * BPF_CORE_READ_KERNEL_OR_USER_INTO() is a more performance-conscious variant of + * BPF_CORE_READ_KERNEL_OR_USER(), in which final field is read into user-provided storage. + * See BPF_CORE_READ_KERNEL_OR_USER() below for more details on general usage. + */ +#define BPF_CORE_READ_KERNEL_OR_USER_INTO(userspace, dst, src, a, ...) \ + ({ \ + typeof(dst) dst_x = dst; \ + typeof(src) src_x = src; \ + typeof(a) a_x = a; \ + ((userspace) ? (___core_read(bpf_core_read_user, dst_x, \ + src_x, a_x, ##__VA_ARGS__)) \ + : (___core_read(bpf_core_read, dst_x, src_x, a_x, \ + ##__VA_ARGS__))) \ + }) + +/* + * BPF_CORE_READ_KERNEL_OR_USER_STR_INTO() does same "pointer chasing" as + * BPF_CORE_READ_KERNEL_OR_USER_STR() for intermediate pointers, but then executes (and returns + * corresponding error code) bpf_core_read_kernel_or_user_str() for final string read. + */ +#define BPF_CORE_READ_KERNEL_OR_USER_STR_INTO(userspace, dst, src, a, ...) \ + ({ \ + typeof(dst) dst_x = dst; \ + typeof(src) src_x = src; \ + typeof(a) a_x = a; \ + if (userspace) \ + ___core_read(bpf_core_read_user_str, dst_x, src_x, \ + a_x, ##__VA_ARGS__) else ___core_read(bpf_core_read_str, dst_x, src_x, a_x, \ + ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially + * when there are few pointer chasing steps. + * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: + * int x = s->a.b.c->d.e->f->g; + * can be succinctly achieved using BPF_CORE_READ as: + * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); + * + * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF + * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * 1. const void *__t = s->a.b.c; + * 2. __t = __t->d.e; + * 3. __t = __t->f; + * 4. return __t->g; + * + * Equivalence is logical, because there is a heavy type casting/preservation + * involved, as well as all the reads are happening through bpf_probe_read() + * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * + * N.B. Only up to 9 "field accessors" are supported, which should be more + * than enough for any practical purpose. + */ +#define BPF_CORE_READ_KERNEL_OR_USER(userspace, src, a, ...) \ + ({ \ + typeof(src) src_x = src; \ + typeof(a) a_x = a; \ + ___type(src_x, a_x, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_KERNEL_OR_USER_INTO(userspace, &__r, src_x, \ + a_x, ##__VA_ARGS__); \ + __r; \ + }) + +#endif // __PROBE_READ_KERNEL_OR_USER_H__ diff --git a/bpf/process/types/skb.h b/bpf/process/types/skb.h index e26e6400973..8631d5bd536 100644 --- a/bpf/process/types/skb.h +++ b/bpf/process/types/skb.h @@ -5,6 +5,7 @@ #define __SKB_H__ #include "tuple.h" +#include "probe_read_kernel_or_user.h" struct skb_type { struct tuple_type tuple; @@ -57,7 +58,7 @@ struct { static inline __attribute__((always_inline)) u8 get_ip6_protocol(u16 *payload_off, struct ipv6hdr *ip, u16 network_header_off, - void *skb_head) + void *skb_head, bool userspace) { struct ipv6extension *e; int zero = 0; @@ -70,7 +71,7 @@ get_ip6_protocol(u16 *payload_off, struct ipv6hdr *ip, u16 network_header_off, e->ip_off = network_header_off; e->curr = 255; e->len = 0; - if (probe_read(&e->next, sizeof(e->next), _(&ip->nexthdr)) < 0) + if (probe_read_kernel_or_user(&e->next, sizeof(e->next), _(&ip->nexthdr), userspace) < 0) return 0; // Maximum 7 valid extensions. @@ -105,8 +106,8 @@ get_ip6_protocol(u16 *payload_off, struct ipv6hdr *ip, u16 network_header_off, } e->curr = e->next; // Read next header and current length. - if (probe_read(&e->next, 2, - skb_head + e->ip_off) < 0) { + if (probe_read_kernel_or_user(&e->next, 2, + skb_head + e->ip_off, userspace) < 0) { return 0; } } @@ -120,40 +121,40 @@ get_ip6_protocol(u16 *payload_off, struct ipv6hdr *ip, u16 network_header_off, * only supports IPv4 with TCP/UDP. */ static inline __attribute__((unused)) int -set_event_from_skb(struct skb_type *event, struct sk_buff *skb) +set_event_from_skb(struct skb_type *event, struct sk_buff *skb, bool userspace) { unsigned char *skb_head = 0; u16 l3_off; typeof(skb->transport_header) l4_off; u8 protocol; - probe_read(&skb_head, sizeof(skb_head), _(&skb->head)); - probe_read(&l3_off, sizeof(l3_off), _(&skb->network_header)); + probe_read_kernel_or_user(&skb_head, sizeof(skb_head), _(&skb->head), userspace); + probe_read_kernel_or_user(&l3_off, sizeof(l3_off), _(&skb->network_header), userspace); struct iphdr *ip = (struct iphdr *)(skb_head + l3_off); u8 iphdr_byte0; - probe_read(&iphdr_byte0, 1, _(ip)); + probe_read_kernel_or_user(&iphdr_byte0, 1, _(ip), userspace); u8 ip_ver = iphdr_byte0 >> 4; if (ip_ver == 4) { // IPv4 - probe_read(&protocol, 1, _(&ip->protocol)); + probe_read_kernel_or_user(&protocol, 1, _(&ip->protocol), userspace); event->tuple.protocol = protocol; event->tuple.family = AF_INET; event->tuple.saddr[0] = 0; event->tuple.saddr[1] = 0; event->tuple.daddr[0] = 0; event->tuple.daddr[1] = 0; - probe_read(&event->tuple.saddr, IPV4LEN, _(&ip->saddr)); - probe_read(&event->tuple.daddr, IPV4LEN, _(&ip->daddr)); - probe_read(&l4_off, sizeof(l4_off), _(&skb->transport_header)); + probe_read_kernel_or_user(&event->tuple.saddr, IPV4LEN, _(&ip->saddr), userspace); + probe_read_kernel_or_user(&event->tuple.daddr, IPV4LEN, _(&ip->daddr), userspace); + probe_read_kernel_or_user(&l4_off, sizeof(l4_off), _(&skb->transport_header), userspace); } else if (ip_ver == 6) { struct ipv6hdr *ip6 = (struct ipv6hdr *)(skb_head + l3_off); - protocol = get_ip6_protocol(&l4_off, ip6, l3_off, skb_head); + protocol = get_ip6_protocol(&l4_off, ip6, l3_off, skb_head, userspace); event->tuple.protocol = protocol; event->tuple.family = AF_INET6; - probe_read(&event->tuple.saddr, IPV6LEN, _(&ip6->saddr)); - probe_read(&event->tuple.daddr, IPV6LEN, _(&ip6->daddr)); + probe_read_kernel_or_user(&event->tuple.saddr, IPV6LEN, _(&ip6->saddr), userspace); + probe_read_kernel_or_user(&event->tuple.daddr, IPV6LEN, _(&ip6->daddr), userspace); } else { // This is not IP, so we don't know how to parse further. return -22; @@ -162,17 +163,17 @@ set_event_from_skb(struct skb_type *event, struct sk_buff *skb) if (protocol == IPPROTO_TCP) { // TCP struct tcphdr *tcp = (struct tcphdr *)(skb_head + l4_off); - probe_read(&event->tuple.sport, sizeof(event->tuple.sport), - _(&tcp->source)); - probe_read(&event->tuple.dport, sizeof(event->tuple.dport), - _(&tcp->dest)); + probe_read_kernel_or_user(&event->tuple.sport, sizeof(event->tuple.sport), + _(&tcp->source), userspace); + probe_read_kernel_or_user(&event->tuple.dport, sizeof(event->tuple.dport), + _(&tcp->dest), userspace); } else if (protocol == IPPROTO_UDP) { // UDP struct udphdr *udp = (struct udphdr *)(skb_head + l4_off); - probe_read(&event->tuple.sport, sizeof(event->tuple.sport), - _(&udp->source)); - probe_read(&event->tuple.dport, sizeof(event->tuple.dport), - _(&udp->dest)); + probe_read_kernel_or_user(&event->tuple.sport, sizeof(event->tuple.sport), + _(&udp->source), userspace); + probe_read_kernel_or_user(&event->tuple.dport, sizeof(event->tuple.dport), + _(&udp->dest), userspace); } else { event->tuple.sport = 0; event->tuple.dport = 0; @@ -186,18 +187,18 @@ set_event_from_skb(struct skb_type *event, struct sk_buff *skb) u64 offset; #define SKB_EXT_SEC_PATH 1 // TBD do this with BTF - probe_read(&ext, sizeof(ext), _(&skb->extensions)); + probe_read_kernel_or_user(&ext, sizeof(ext), _(&skb->extensions), userspace); if (ext) { - probe_read(&offset, sizeof(offset), - _(&ext->offset[SKB_EXT_SEC_PATH])); + probe_read_kernel_or_user(&offset, sizeof(offset), + _(&ext->offset[SKB_EXT_SEC_PATH]), userspace); sp = (void *)ext + (offset << 3); - probe_read(&event->secpath_len, - sizeof(event->secpath_len), - _(&sp->len)); - probe_read(&event->secpath_olen, - sizeof(event->secpath_olen), - _(&sp->olen)); + probe_read_kernel_or_user(&event->secpath_len, + sizeof(event->secpath_len), + _(&sp->len), userspace); + probe_read_kernel_or_user(&event->secpath_olen, + sizeof(event->secpath_olen), + _(&sp->olen), userspace); } } return 0; diff --git a/bpf/process/types/sock.h b/bpf/process/types/sock.h index d11307189b0..bd8878f9cc3 100644 --- a/bpf/process/types/sock.h +++ b/bpf/process/types/sock.h @@ -5,6 +5,7 @@ #define __SOCK_H__ #include "tuple.h" +#include "probe_read_kernel_or_user.h" // The sockaddr field is specifically a __u64 to deter from trying to dereference it. // If an application needs more fields from the sock then they should be added to @@ -24,28 +25,28 @@ struct sk_type { * Populate the event args with the sock info. */ static inline __attribute__((unused)) void -set_event_from_sock(struct sk_type *event, struct sock *sk) +set_event_from_sock(struct sk_type *event, struct sock *sk, bool userspace) { struct sock_common *common = (struct sock_common *)sk; event->sockaddr = (__u64)sk; - probe_read(&event->tuple.family, sizeof(event->tuple.family), - _(&common->skc_family)); - probe_read(&event->state, sizeof(event->state), - _((const void *)&common->skc_state)); - probe_read(&event->type, sizeof(event->type), _(&sk->sk_type)); - probe_read(&event->tuple.protocol, sizeof(event->tuple.protocol), - _(&sk->sk_protocol)); + probe_read_kernel_or_user(&event->tuple.family, sizeof(event->tuple.family), + _(&common->skc_family), userspace); + probe_read_kernel_or_user(&event->state, sizeof(event->state), + _((const void *)&common->skc_state), userspace); + probe_read_kernel_or_user(&event->type, sizeof(event->type), _(&sk->sk_type), userspace); + probe_read_kernel_or_user(&event->tuple.protocol, sizeof(event->tuple.protocol), + _(&sk->sk_protocol), userspace); if (bpf_core_field_size(sk->sk_protocol) == 4) { // In the BTF, the protocol field in kernels tuple.protocol = event->tuple.protocol >> 8; } - probe_read(&event->mark, sizeof(event->mark), _(&sk->sk_mark)); - probe_read(&event->priority, sizeof(event->priority), - _(&sk->sk_priority)); + probe_read_kernel_or_user(&event->mark, sizeof(event->mark), _(&sk->sk_mark), userspace); + probe_read_kernel_or_user(&event->priority, sizeof(event->priority), + _(&sk->sk_priority), userspace); event->tuple.saddr[0] = 0; event->tuple.saddr[1] = 0; @@ -53,16 +54,16 @@ set_event_from_sock(struct sk_type *event, struct sock *sk) event->tuple.daddr[1] = 0; switch (event->tuple.family) { case AF_INET: - probe_read(&event->tuple.saddr, IPV4LEN, _(&common->skc_rcv_saddr)); - probe_read(&event->tuple.daddr, IPV4LEN, _(&common->skc_daddr)); + probe_read_kernel_or_user(&event->tuple.saddr, IPV4LEN, _(&common->skc_rcv_saddr), userspace); + probe_read_kernel_or_user(&event->tuple.daddr, IPV4LEN, _(&common->skc_daddr), userspace); break; case AF_INET6: - probe_read(&event->tuple.saddr, IPV6LEN, _(&common->skc_v6_rcv_saddr)); - probe_read(&event->tuple.daddr, IPV6LEN, _(&common->skc_v6_daddr)); + probe_read_kernel_or_user(&event->tuple.saddr, IPV6LEN, _(&common->skc_v6_rcv_saddr), userspace); + probe_read_kernel_or_user(&event->tuple.daddr, IPV6LEN, _(&common->skc_v6_daddr), userspace); } - probe_read(&event->tuple.sport, sizeof(event->tuple.sport), _(&common->skc_num)); - probe_read(&event->tuple.dport, sizeof(event->tuple.dport), _(&common->skc_dport)); + probe_read_kernel_or_user(&event->tuple.sport, sizeof(event->tuple.sport), _(&common->skc_num), userspace); + probe_read_kernel_or_user(&event->tuple.dport, sizeof(event->tuple.dport), _(&common->skc_dport), userspace); event->tuple.dport = bpf_ntohs(event->tuple.dport); } #endif // __SOCK_H__ diff --git a/pkg/api/tracingapi/client_kprobe.go b/pkg/api/tracingapi/client_kprobe.go index 5651e69e464..ee7f132b0ca 100644 --- a/pkg/api/tracingapi/client_kprobe.go +++ b/pkg/api/tracingapi/client_kprobe.go @@ -586,7 +586,9 @@ type EventConfig struct { ArgTpCtxOff [EventConfigMaxArgs]uint32 `align:"t_arg0_ctx_off"` Syscall uint32 `align:"syscall"` ArgReturnCopy int32 `align:"argreturncopy"` + ArgMReturnCopy uint32 `align:"argmreturncopy"` ArgReturn int32 `align:"argreturn"` + ArgMReturn uint32 `align:"argmreturn"` ArgReturnAction int32 `align:"argreturnaction"` PolicyID uint32 `align:"policy_id"` Flags uint32 `align:"flags"` diff --git a/pkg/kernels/kernels.go b/pkg/kernels/kernels.go index c85070f8d0b..0b0038f034c 100644 --- a/pkg/kernels/kernels.go +++ b/pkg/kernels/kernels.go @@ -157,6 +157,8 @@ func GenericKprobeObjs() (string, string) { return "bpf_generic_kprobe_v61.o", "bpf_generic_retkprobe_v61.o" } else if MinKernelVersion("5.11") { return "bpf_generic_kprobe_v511.o", "bpf_generic_retkprobe_v511.o" + } else if MinKernelVersion("5.4") { + return "bpf_generic_kprobe_v54.o", "bpf_generic_retkprobe_v54.o" } else if EnableLargeProgs() { return "bpf_generic_kprobe_v53.o", "bpf_generic_retkprobe_v53.o" } diff --git a/pkg/sensors/tracing/args.go b/pkg/sensors/tracing/args.go index 430a5a04a61..afad59facb0 100644 --- a/pkg/sensors/tracing/args.go +++ b/pkg/sensors/tracing/args.go @@ -34,6 +34,7 @@ const ( argReturnCopyBit = 1 << 4 argMaxDataBit = 1 << 5 argUserspaceDataBit = 1 << 6 + argRawSyscallsBit = 1 << 7 ) func argReturnCopy(meta int) bool { @@ -47,9 +48,10 @@ func argReturnCopy(meta int) bool { // 4 : ReturnCopy // 5 : MaxData // 6 : UserspaceData -// 7-15 : reserved +// 7 : RawSyscalls +// 8-15 : reserved // 16-31 : size for const_buf -func getMetaValue(arg *v1alpha1.KProbeArg, userspaceDataDefault bool) (int, error) { +func getMetaValue(arg *v1alpha1.KProbeArg, userspaceDataDefault bool, rawSyscalls bool) (int, error) { meta := 0 if arg.SizeArgIndex > 0 { @@ -75,6 +77,9 @@ func getMetaValue(arg *v1alpha1.KProbeArg, userspaceDataDefault bool) (int, erro meta = meta | argUserspaceDataBit } } + if rawSyscalls { + meta = meta | argRawSyscallsBit + } return meta, nil } diff --git a/pkg/sensors/tracing/generickprobe.go b/pkg/sensors/tracing/generickprobe.go index f154423fead..69d402749bd 100644 --- a/pkg/sensors/tracing/generickprobe.go +++ b/pkg/sensors/tracing/generickprobe.go @@ -279,6 +279,9 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E } else if kernels.MinKernelVersion("5.11") { loadProgName = "bpf_multi_kprobe_v511.o" loadProgRetName = "bpf_multi_retkprobe_v511.o" + } else if kernels.MinKernelVersion("5.4") { + loadProgName = "bpf_multi_kprobe_v54.o" + loadProgRetName = "bpf_multi_retkprobe_v54.o" } pinPath := multiKprobePinPath(sensorPath) @@ -663,7 +666,7 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt } } // For kprobes, args default to userspace memory for syscalls, and kernel memory otherwise. - argMValue, err := getMetaValue(&a, f.Syscall) + argMValue, err := getMetaValue(&a, f.Syscall, false) if err != nil { return errFn(err) } @@ -703,6 +706,11 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt return errFn(fmt.Errorf("ReturnArg type '%s' unsupported", f.ReturnArg.Type)) } config.ArgReturn = int32(argType) + argMValue, err := getMetaValue(f.ReturnArg, f.Syscall, false) + if err != nil { + return errFn(err) + } + config.ArgMReturn = uint32(argMValue) argsBTFSet[api.ReturnArgIndex] = true argP := argPrinter{index: api.ReturnArgIndex, ty: argType} argReturnPrinters = append(argReturnPrinters, argP) @@ -716,6 +724,11 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt argType := gt.GenericTypeFromString(argRetprobe.Type) config.ArgReturnCopy = int32(argType) + argMValue, err := getMetaValue(argRetprobe, f.Syscall, false) + if err != nil { + return errFn(err) + } + config.ArgMReturnCopy = uint32(argMValue) argP := argPrinter{index: int(argRetprobe.Index), ty: argType, label: argRetprobe.Label} argReturnPrinters = append(argReturnPrinters, argP) diff --git a/pkg/sensors/tracing/generictracepoint.go b/pkg/sensors/tracing/generictracepoint.go index ca129dc57db..96d555469e7 100644 --- a/pkg/sensors/tracing/generictracepoint.go +++ b/pkg/sensors/tracing/generictracepoint.go @@ -235,7 +235,8 @@ func (out *genericTracepointArg) getGenericTypeId() (int, error) { func buildGenericTracepointArgs(info *tracepoint.Tracepoint, specArgs []v1alpha1.KProbeArg) ([]genericTracepointArg, error) { ret := make([]genericTracepointArg, 0, len(specArgs)) nfields := uint32(len(info.Format.Fields)) - syscall := info.Subsys == "syscalls" || info.Subsys == "raw_syscalls" + rawSyscalls := info.Subsys == "raw_syscalls" + syscall := rawSyscalls || info.Subsys == "syscalls" for argIdx := range specArgs { specArg := &specArgs[argIdx] @@ -244,7 +245,7 @@ func buildGenericTracepointArgs(info *tracepoint.Tracepoint, specArgs []v1alpha1 } field := info.Format.Fields[specArg.Index] // Syscall tracepoint arguments are in userspace memory. - metaTp, err := getMetaValue(specArg, syscall) + metaTp, err := getMetaValue(specArg, syscall, rawSyscalls && specArg.Index == 5 && (specArg.Type == "")) if err != nil { return nil, fmt.Errorf("tracepoint %s/%s getMetaValue error: %w", info.Subsys, info.Event, err) } diff --git a/pkg/sensors/tracing/genericuprobe.go b/pkg/sensors/tracing/genericuprobe.go index 23941a26237..6233e4eafea 100644 --- a/pkg/sensors/tracing/genericuprobe.go +++ b/pkg/sensors/tracing/genericuprobe.go @@ -245,7 +245,7 @@ func addUprobe(spec *v1alpha1.UProbeSpec, ids []idtable.EntryID, in *addUprobeIn return nil, fmt.Errorf("Arg(%d) type '%s' unsupported", i, a.Type) } // For uprobes, args default to userspace memory. - argMValue, err := getMetaValue(&a, true) + argMValue, err := getMetaValue(&a, true, false) if err != nil { return nil, err } diff --git a/pkg/sensors/tracing/kprobe_test.go b/pkg/sensors/tracing/kprobe_test.go index ad91d7fb009..141f512e6fb 100644 --- a/pkg/sensors/tracing/kprobe_test.go +++ b/pkg/sensors/tracing/kprobe_test.go @@ -4243,20 +4243,21 @@ func TestLoadKprobeSensor(t *testing.T) { 6: tus.SensorProg{Name: "generic_kprobe_output", Type: ebpf.Kprobe}, // retkprobe 7: tus.SensorProg{Name: "generic_retkprobe_event", Type: ebpf.Kprobe}, - 8: tus.SensorProg{Name: "generic_retkprobe_filter_arg", Type: ebpf.Kprobe}, - 9: tus.SensorProg{Name: "generic_retkprobe_actions", Type: ebpf.Kprobe}, - 10: tus.SensorProg{Name: "generic_retkprobe_output", Type: ebpf.Kprobe}, + 8: tus.SensorProg{Name: "generic_retkprobe_copy_arg", Type: ebpf.Kprobe}, + 9: tus.SensorProg{Name: "generic_retkprobe_filter_arg", Type: ebpf.Kprobe}, + 10: tus.SensorProg{Name: "generic_retkprobe_actions", Type: ebpf.Kprobe}, + 11: tus.SensorProg{Name: "generic_retkprobe_output", Type: ebpf.Kprobe}, } var sensorMaps = []tus.SensorMap{ // all kprobe programs - tus.SensorMap{Name: "process_call_heap", Progs: []uint{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}, + tus.SensorMap{Name: "process_call_heap", Progs: []uint{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}, // all but generic_kprobe_output tus.SensorMap{Name: "kprobe_calls", Progs: []uint{0, 1, 2, 3, 4, 5}}, // generic_retkprobe_event - tus.SensorMap{Name: "retkprobe_calls", Progs: []uint{7, 8, 9}}, + tus.SensorMap{Name: "retkprobe_calls", Progs: []uint{7, 8, 9, 10}}, // generic_kprobe_process_filter,generic_kprobe_filter_arg, // generic_kprobe_actions,generic_kprobe_output @@ -4269,7 +4270,7 @@ func TestLoadKprobeSensor(t *testing.T) { tus.SensorMap{Name: "config_map", Progs: []uint{0, 1, 2}}, // generic_kprobe_process_event*,generic_kprobe_actions,retkprobe - tus.SensorMap{Name: "fdinstall_map", Progs: []uint{1, 2, 5, 7, 9}}, + tus.SensorMap{Name: "fdinstall_map", Progs: []uint{1, 2, 5, 7, 10}}, // generic_kprobe_event tus.SensorMap{Name: "tg_conf_map", Progs: []uint{0}}, @@ -4277,19 +4278,19 @@ func TestLoadKprobeSensor(t *testing.T) { if kernels.EnableLargeProgs() { // shared with base sensor - sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{4, 5, 6, 7, 9}}) + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{4, 5, 6, 8, 10}}) // generic_kprobe_process_event*,generic_kprobe_output,generic_retkprobe_output - sensorMaps = append(sensorMaps, tus.SensorMap{Name: "tcpmon_map", Progs: []uint{1, 2, 6, 10}}) + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "tcpmon_map", Progs: []uint{1, 2, 6, 7, 11}}) // generic_kprobe_process_event*,generic_kprobe_actions,retkprobe - sensorMaps = append(sensorMaps, tus.SensorMap{Name: "socktrack_map", Progs: []uint{1, 2, 5, 7, 9}}) + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "socktrack_map", Progs: []uint{1, 2, 5, 7, 10}}) } else { // shared with base sensor - sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{4, 7}}) + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "execve_map", Progs: []uint{4, 8}}) // generic_kprobe_output,generic_retkprobe_output - sensorMaps = append(sensorMaps, tus.SensorMap{Name: "tcpmon_map", Progs: []uint{6, 10}}) + sensorMaps = append(sensorMaps, tus.SensorMap{Name: "tcpmon_map", Progs: []uint{6, 11}}) } readHook := ` diff --git a/pkg/sensors/tracing/tracepoint_amd64_test.go b/pkg/sensors/tracing/tracepoint_amd64_test.go index 82f60e6ff73..09da3c91139 100644 --- a/pkg/sensors/tracing/tracepoint_amd64_test.go +++ b/pkg/sensors/tracing/tracepoint_amd64_test.go @@ -51,9 +51,9 @@ func testListSyscallsDups(t *testing.T, checker *eventchecker.UnorderedEventChec assert.NoError(t, err) } -func TestTracepointListSyscallDups(t *testing.T) { +func TestTracepointListSyscallDupsEqual(t *testing.T) { if !kernels.MinKernelVersion("5.3.0") { - t.Skip("TestCopyFd requires at least 5.3.0 version") + t.Skip("TestTracepointListSyscallDupsEqual requires at least 5.3.0 version") } myPid := observertesthelper.GetMyPid() @@ -87,11 +87,11 @@ spec: values: - ` + pidStr + ` matchArgs: - - index: 0 + - index: 4 operator: "InMap" values: - "list:test" - - index: 1 + - index: 5 operator: "Equal" values: - 9999 diff --git a/pkg/sensors/tracing/tracepoint_test.go b/pkg/sensors/tracing/tracepoint_test.go index d49e1462ff7..2b754028382 100644 --- a/pkg/sensors/tracing/tracepoint_test.go +++ b/pkg/sensors/tracing/tracepoint_test.go @@ -824,11 +824,11 @@ spec: values: - ` + pidStr + ` matchArgs: - - index: 0 + - index: 4 operator: "InMap" values: - "list:test" - - index: 1 + - index: 5 operator: "InMap" values: - 9910:9920