diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6792a7940e1e6..e64ff1e89fb2a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1617,8 +1617,12 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -/* Maximum number of loops for bpf_loop */ -#define BPF_MAX_LOOPS BIT(23) +/* Maximum number of loops for bpf_loop and bpf_iter_num. + * It's enum to expose it (and thus make it discoverable) through BTF. + */ +enum { + BPF_MAX_LOOPS = 8 * 1024 * 1024, +}; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 18538bad2b8c7..0c052bc799401 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,14 @@ struct bpf_active_lock { u32 id; }; +#define ITER_PREFIX "bpf_iter_" + +enum bpf_iter_state { + BPF_ITER_STATE_INVALID, /* for non-first slot */ + BPF_ITER_STATE_ACTIVE, + BPF_ITER_STATE_DRAINED, +}; + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; @@ -103,6 +111,18 @@ struct bpf_reg_state { bool first_slot; } dynptr; + /* For bpf_iter stack slots */ + struct { + /* BTF container and BTF type ID describing + * struct bpf_iter_ of an iterator state + */ + struct btf *btf; + u32 btf_id; + /* packing following two fields to fit iter state into 16 bytes */ + enum bpf_iter_state state:2; + int depth:30; + } iter; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -141,6 +161,8 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. * For stack slots that are dynptrs, this is used to track references to * the dynptr to determine proper reference freeing. + * Similarly to dynptrs, we use ID to track "belonging" of a reference + * to a specific instance of bpf_iter. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -211,9 +233,11 @@ enum bpf_stack_slot_type { * is stored in bpf_stack_state->spilled_ptr.dynptr.type */ STACK_DYNPTR, + STACK_ITER, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + #define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) #define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) @@ -448,6 +472,7 @@ struct bpf_insn_aux_data { bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ + bool is_iter_next; /* bpf_iter__next() kfunc call */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 556b3e2e7471e..1bba0827e8c46 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -71,6 +71,10 @@ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ +/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */ +#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ +#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ +#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 976b194eb7754..4abddb668a107 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5dc307bdeaebc..96856f130cbff 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +struct bpf_iter_num_kern { + int cur; /* current value, inclusive */ + int end; /* final value, exclusive */ +} __aligned(8); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) +{ + struct bpf_iter_num_kern *s = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); + + BTF_TYPE_EMIT(struct btf_iter_num); + + /* start == end is legit, it's an empty range and we'll just get NULL + * on first (and any subsequent) bpf_iter_num_next() call + */ + if (start > end) { + s->cur = s->end = 0; + return -EINVAL; + } + + /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ + if ((s64)end - (s64)start > BPF_MAX_LOOPS) { + s->cur = s->end = 0; + return -E2BIG; + } + + /* user will call bpf_iter_num_next() first, + * which will set s->cur to exactly start value; + * underflow shouldn't matter + */ + s->cur = start - 1; + s->end = end; + + return 0; +} + +__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + /* check failed initialization or if we are done (same behavior); + * need to be careful about overflow, so convert to s64 for checks, + * e.g., if s->cur == s->end == INT_MAX, we can't just do + * s->cur + 1 >= s->end + */ + if ((s64)(s->cur + 1) >= s->end) { + s->cur = s->end = 0; + return NULL; + } + + s->cur++; + + return &s->cur; +} + +__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) +{ + struct bpf_iter_num_kern *s = (void *)it; + + s->cur = s->end = 0; +} + +__diag_pop(); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a8cb09e5973b7..71758cd15b070 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7596,6 +7596,108 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE +static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, + const struct btf_type *func, u32 func_flags) +{ + u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); + const char *name, *sfx, *iter_name; + const struct btf_param *arg; + const struct btf_type *t; + char exp_name[128]; + u32 nr_args; + + /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ + if (!flags || (flags & (flags - 1))) + return -EINVAL; + + /* any BPF iter kfunc should have `struct bpf_iter_ *` first arg */ + nr_args = btf_type_vlen(func); + if (nr_args < 1) + return -EINVAL; + + arg = &btf_params(func)[0]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, NULL); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + /* sizeof(struct bpf_iter_) should be a multiple of 8 to + * fit nicely in stack slots + */ + if (t->size == 0 || (t->size % 8)) + return -EINVAL; + + /* validate bpf_iter__{new,next,destroy}(struct bpf_iter_ *) + * naming pattern + */ + iter_name = name + sizeof(ITER_PREFIX) - 1; + if (flags & KF_ITER_NEW) + sfx = "new"; + else if (flags & KF_ITER_NEXT) + sfx = "next"; + else /* (flags & KF_ITER_DESTROY) */ + sfx = "destroy"; + + snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx); + if (strcmp(func_name, exp_name)) + return -EINVAL; + + /* only iter constructor should have extra arguments */ + if (!(flags & KF_ITER_NEW) && nr_args != 1) + return -EINVAL; + + if (flags & KF_ITER_NEXT) { + /* bpf_iter__next() should return pointer */ + t = btf_type_skip_modifiers(btf, func->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + + if (flags & KF_ITER_DESTROY) { + /* bpf_iter__destroy() should return void */ + t = btf_type_by_id(btf, func->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + } + + return 0; +} + +static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) +{ + const struct btf_type *func; + const char *func_name; + int err; + + /* any kfunc should be FUNC -> FUNC_PROTO */ + func = btf_type_by_id(btf, func_id); + if (!func || !btf_type_is_func(func)) + return -EINVAL; + + /* sanity check kfunc name */ + func_name = btf_name_by_offset(btf, func->name_off); + if (!func_name || !func_name[0]) + return -EINVAL; + + func = btf_type_by_id(btf, func->type); + if (!func || !btf_type_is_func_proto(func)) + return -EINVAL; + + if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) { + err = btf_check_iter_kfuncs(btf, func_name, func, func_flags); + if (err) + return err; + } + + return 0; +} + /* Kernel Function (kfunc) BTF ID set registration API */ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, @@ -7772,7 +7874,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, const struct btf_kfunc_id_set *kset) { struct btf *btf; - int ret; + int ret, i; btf = btf_get_module_btf(kset->owner); if (!btf) { @@ -7789,7 +7891,15 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, if (IS_ERR(btf)) return PTR_ERR(btf); + for (i = 0; i < kset->set->cnt; i++) { + ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, + kset->set->pairs[i].flags); + if (ret) + goto err_out; + } + ret = btf_populate_kfunc_set(btf, hook, kset->set); +err_out: btf_put(btf); return ret; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 637ac4e92e756..f9b7eeedce08b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_SET8_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b2116ca78d9a3..45a0822844648 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -302,6 +302,10 @@ struct bpf_kfunc_call_arg_meta { enum bpf_dynptr_type type; u32 id; } initialized_dynptr; + struct { + u8 spi; + u8 frameno; + } iter; u64 mem_size; }; @@ -668,6 +672,7 @@ static char slot_type_char[] = { [STACK_MISC] = 'm', [STACK_ZERO] = '0', [STACK_DYNPTR] = 'd', + [STACK_ITER] = 'i', }; static void print_liveness(struct bpf_verifier_env *env, @@ -742,6 +747,11 @@ static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *re return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); } +static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots) +{ + return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); +} + static const char *kernel_type_name(const struct btf* btf, u32 id) { return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); @@ -766,6 +776,30 @@ static const char *dynptr_type_str(enum bpf_dynptr_type type) } } +static const char *iter_type_str(const struct btf *btf, u32 btf_id) +{ + if (!btf || btf_id == 0) + return ""; + + /* we already validated that type is valid and has conforming name */ + return kernel_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; +} + +static const char *iter_state_str(enum bpf_iter_state state) +{ + switch (state) { + case BPF_ITER_STATE_ACTIVE: + return "active"; + case BPF_ITER_STATE_DRAINED: + return "drained"; + case BPF_ITER_STATE_INVALID: + return ""; + default: + WARN_ONCE(1, "unknown iter state %d\n", state); + return ""; + } +} + static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; @@ -1118,6 +1152,157 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg } } +static void __mark_reg_known_zero(struct bpf_reg_state *reg); + +static int mark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int insn_idx, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j, id; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = i == 0 ? id : 0; + st->iter.btf = btf; + st->iter.btf_id = btf_id; + st->iter.state = BPF_ITER_STATE_ACTIVE; + st->iter.depth = 0; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_ITER; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static int unmark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return spi; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + if (i == 0) + WARN_ON_ONCE(release_reference(env, st->ref_obj_id)); + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (j = 0; j < BPF_REG_SIZE; j++) + slot->slot_type[j] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + +static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = iter_get_spi(env, reg, nr_slots); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] == STACK_ITER) + return false; + } + + return true; +} + +static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + struct btf *btf, u32 btf_id, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i, j; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0) + return false; + + for (i = 0; i < nr_slots; i++) { + struct bpf_stack_state *slot = &state->stack[spi - i]; + struct bpf_reg_state *st = &slot->spilled_ptr; + + /* only main (first) slot has ref_obj_id set */ + if (i == 0 && !st->ref_obj_id) + return false; + if (i != 0 && st->ref_obj_id) + return false; + if (st->iter.btf != btf || st->iter.btf_id != btf_id) + return false; + + for (j = 0; j < BPF_REG_SIZE; j++) + if (slot->slot_type[j] != STACK_ITER) + return false; + } + + return true; +} + +/* Check if given stack slot is "special": + * - spilled register state (STACK_SPILL); + * - dynptr state (STACK_DYNPTR); + * - iter state (STACK_ITER). + */ +static bool is_stack_slot_special(const struct bpf_stack_state *stack) +{ + enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1]; + + switch (type) { + case STACK_SPILL: + case STACK_DYNPTR: + case STACK_ITER: + return true; + case STACK_INVALID: + case STACK_MISC: + case STACK_ZERO: + return false; + default: + WARN_ONCE(1, "unknown stack slot type %d\n", type); + return true; + } +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -1267,6 +1452,19 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (reg->ref_obj_id) verbose(env, "(ref_id=%d)", reg->ref_obj_id); break; + case STACK_ITER: + /* only main slot has ref_obj_id set; skip others */ + reg = &state->stack[i].spilled_ptr; + if (!reg->ref_obj_id) + continue; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)", + iter_type_str(reg->iter.btf, reg->iter.btf_id), + reg->ref_obj_id, iter_state_str(reg->iter.state), + reg->iter.depth); + break; case STACK_MISC: case STACK_ZERO: default: @@ -2710,6 +2908,25 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state * state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); } +static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi, int nr_slots) +{ + struct bpf_func_state *state = func(env, reg); + int err, i; + + for (i = 0; i < nr_slots; i++) { + struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; + + err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + if (err) + return err; + + mark_stack_slot_scratched(env, spi - i); + } + + return 0; +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -3691,8 +3908,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; - /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ - if (is_spilled_reg(&state->stack[spi])) + /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */ + if (is_stack_slot_special(&state->stack[spi])) for (i = 0; i < BPF_REG_SIZE; i++) scrub_spilled_slot(&state->stack[spi].slot_type[i]); @@ -6506,6 +6723,203 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return err; } +static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) +{ + struct bpf_func_state *state = func(env, reg); + + return state->stack[spi].spilled_ptr.ref_obj_id; +} + +static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); +} + +static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEW; +} + +static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_NEXT; +} + +static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ITER_DESTROY; +} + +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg) +{ + /* btf_check_iter_kfuncs() guarantees that first argument of any iter + * kfunc is iter state pointer + */ + return arg == 0 && is_iter_kfunc(meta); +} + +static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + const struct btf_type *t; + const struct btf_param *arg; + int spi, err, i, nr_slots; + u32 btf_id; + + /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */ + arg = &btf_params(meta->func_proto)[0]; + t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */ + t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ + nr_slots = t->size / BPF_REG_SIZE; + + spi = iter_get_spi(env, reg, nr_slots); + if (spi < 0 && spi != -ERANGE) + return spi; + + meta->iter.spi = spi; + meta->iter.frameno = reg->frameno; + + if (is_iter_new_kfunc(meta)) { + /* bpf_iter__new() expects pointer to uninit iter state */ + if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { + verbose(env, "expected uninitialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { + err = check_mem_access(env, insn_idx, regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots); + if (err) + return err; + } else { + /* iter_next() or iter_destroy() expect initialized iter state*/ + if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) { + verbose(env, "expected an initialized iter_%s as arg #%d\n", + iter_type_str(meta->btf, btf_id), regno); + return -EINVAL; + } + + err = mark_iter_read(env, reg, spi, nr_slots); + if (err) + return err; + + meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); + + if (is_iter_destroy_kfunc(meta)) { + err = unmark_stack_slots_iter(env, reg, nr_slots); + if (err) + return err; + } + } + + return 0; +} + +/* process_iter_next_call() is called when verifier gets to iterator's next + * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer + * to it as just "iter_next()" in comments below. + * + * BPF verifier relies on a crucial contract for any iter_next() + * implementation: it should *eventually* return NULL, and once that happens + * it should keep returning NULL. That is, once iterator exhausts elements to + * iterate, it should never reset or spuriously return new elements. + * + * With the assumption of such contract, process_iter_next_call() simulates + * a fork in the verifier state to validate loop logic correctness and safety + * without having to simulate infinite amount of iterations. + * + * In current state, we first assume that iter_next() returned NULL and + * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such + * conditions we should not form an infinite loop and should eventually reach + * exit. + * + * Besides that, we also fork current state and enqueue it for later + * verification. In a forked state we keep iterator state as ACTIVE + * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We + * also bump iteration depth to prevent erroneous infinite loop detection + * later on (see iter_active_depths_differ() comment for details). In this + * state we assume that we'll eventually loop back to another iter_next() + * calls (it could be in exactly same location or in some other instruction, + * it doesn't matter, we don't make any unnecessary assumptions about this, + * everything revolves around iterator state in a stack slot, not which + * instruction is calling iter_next()). When that happens, we either will come + * to iter_next() with equivalent state and can conclude that next iteration + * will proceed in exactly the same way as we just verified, so it's safe to + * assume that loop converges. If not, we'll go on another iteration + * simulation with a different input state, until all possible starting states + * are validated or we reach maximum number of instructions limit. + * + * This way, we will either exhaustively discover all possible input states + * that iterator loop can start with and eventually will converge, or we'll + * effectively regress into bounded loop simulation logic and either reach + * maximum number of instructions if loop is not provably convergent, or there + * is some statically known limit on number of iterations (e.g., if there is + * an explicit `if n > 100 then break;` statement somewhere in the loop). + * + * One very subtle but very important aspect is that we *always* simulate NULL + * condition first (as the current state) before we simulate non-NULL case. + * This has to do with intricacies of scalar precision tracking. By simulating + * "exit condition" of iter_next() returning NULL first, we make sure all the + * relevant precision marks *that will be set **after** we exit iterator loop* + * are propagated backwards to common parent state of NULL and non-NULL + * branches. Thanks to that, state equivalence checks done later in forked + * state, when reaching iter_next() for ACTIVE iterator, can assume that + * precision marks are finalized and won't change. Because simulating another + * ACTIVE iterator iteration won't change them (because given same input + * states we'll end up with exactly same output states which we are currently + * comparing; and verification after the loop already propagated back what + * needs to be **additionally** tracked as precise). It's subtle, grok + * precision tracking for more intuitive understanding. + */ +static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; + struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; + struct bpf_reg_state *cur_iter, *queued_iter; + int iter_frameno = meta->iter.frameno; + int iter_spi = meta->iter.spi; + + BTF_TYPE_EMIT(struct bpf_iter); + + cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + + if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && + cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { + verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n", + cur_iter->iter.state, iter_state_str(cur_iter->iter.state)); + return -EFAULT; + } + + if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { + /* branch out active iter state */ + queued_st = push_stack(env, insn_idx + 1, insn_idx, false); + if (!queued_st) + return -ENOMEM; + + queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; + queued_iter->iter.depth++; + + queued_fr = queued_st->frame[queued_st->curframe]; + mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); + } + + /* switch to DRAINED state, but keep the depth unchanged */ + /* mark current iter state as drained and assume returned NULL */ + cur_iter->iter.state = BPF_ITER_STATE_DRAINED; + __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]); + + return 0; +} + static bool arg_type_is_mem_size(enum bpf_arg_type type) { return type == ARG_CONST_SIZE || @@ -9099,6 +9513,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ KF_ARG_PTR_TO_DYNPTR, + KF_ARG_PTR_TO_ITER, KF_ARG_PTR_TO_LIST_HEAD, KF_ARG_PTR_TO_LIST_NODE, KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ @@ -9220,6 +9635,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR; + if (is_kfunc_arg_iter(meta, argno)) + return KF_ARG_PTR_TO_ITER; + if (is_kfunc_arg_list_head(meta->btf, &args[argno])) return KF_ARG_PTR_TO_LIST_HEAD; @@ -9848,6 +10266,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_KPTR: case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: case KF_ARG_PTR_TO_LIST_NODE: case KF_ARG_PTR_TO_RB_ROOT: @@ -9944,6 +10363,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; } + case KF_ARG_PTR_TO_ITER: + ret = process_iter_arg(env, regno, insn_idx, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_LIST_HEAD: if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { @@ -10079,24 +10503,21 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return 0; } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx_p) +static int fetch_kfunc_meta(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_kfunc_call_arg_meta *meta, + const char **kfunc_name) { - const struct btf_type *t, *func, *func_proto, *ptr_type; - u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id; - struct bpf_reg_state *regs = cur_regs(env); - const char *func_name, *ptr_type_name; - bool sleepable, rcu_lock, rcu_unlock; - struct bpf_kfunc_call_arg_meta meta; - int err, insn_idx = *insn_idx_p; - const struct btf_param *args; - const struct btf_type *ret_t; + const struct btf_type *func, *func_proto; + u32 func_id, *kfunc_flags; + const char *func_name; struct btf *desc_btf; - u32 *kfunc_flags; - /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (kfunc_name) + *kfunc_name = NULL; + if (!insn->imm) - return 0; + return -EINVAL; desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) @@ -10105,22 +10526,53 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_id = insn->imm; func = btf_type_by_id(desc_btf, func_id); func_name = btf_name_by_offset(desc_btf, func->name_off); + if (kfunc_name) + *kfunc_name = func_name; func_proto = btf_type_by_id(desc_btf, func->type); kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); if (!kfunc_flags) { - verbose(env, "calling kernel function %s is not allowed\n", - func_name); return -EACCES; } - /* Prepare kfunc call metadata */ - memset(&meta, 0, sizeof(meta)); - meta.btf = desc_btf; - meta.func_id = func_id; - meta.kfunc_flags = *kfunc_flags; - meta.func_proto = func_proto; - meta.func_name = func_name; + memset(meta, 0, sizeof(*meta)); + meta->btf = desc_btf; + meta->func_id = func_id; + meta->kfunc_flags = *kfunc_flags; + meta->func_proto = func_proto; + meta->func_name = func_name; + + return 0; +} + +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) +{ + const struct btf_type *t, *ptr_type; + u32 i, nargs, ptr_type_id, release_ref_obj_id; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + bool sleepable, rcu_lock, rcu_unlock; + struct bpf_kfunc_call_arg_meta meta; + struct bpf_insn_aux_data *insn_aux; + int err, insn_idx = *insn_idx_p; + const struct btf_param *args; + const struct btf_type *ret_t; + struct btf *desc_btf; + + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + err = fetch_kfunc_meta(env, insn, &meta, &func_name); + if (err == -EACCES && func_name) + verbose(env, "calling kernel function %s is not allowed\n", func_name); + if (err) + return err; + desc_btf = meta.btf; + insn_aux = &env->insn_aux_data[insn_idx]; + + insn_aux->is_iter_next = is_iter_next_kfunc(&meta); if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); @@ -10173,7 +10625,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = release_reference(env, regs[meta.release_regno].ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10185,14 +10637,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", - func_name, func_id); + func_name, meta.func_id); return err; } err = release_reference(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10202,7 +10654,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, set_rbtree_add_callback_state); if (err) { verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, func_id); + func_name, meta.func_id); return err; } } @@ -10211,7 +10663,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL); if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { /* Only exception is bpf_obj_new_impl */ @@ -10260,11 +10712,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; - env->insn_aux_data[insn_idx].obj_new_size = ret_t->size; - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->obj_new_size = ret_t->size; + insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { - env->insn_aux_data[insn_idx].kptr_struct_meta = + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_obj_drop.btf, meta.arg_obj_drop.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || @@ -10397,8 +10849,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ - nargs = btf_type_vlen(func_proto); - args = (const struct btf_param *)(func_proto + 1); + nargs = btf_type_vlen(meta.func_proto); + args = (const struct btf_param *)(meta.func_proto + 1); for (i = 0; i < nargs; i++) { u32 regno = i + 1; @@ -10410,6 +10862,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_btf_func_reg_size(env, regno, t->size); } + if (is_iter_next_kfunc(&meta)) { + err = process_iter_next_call(env, insn_idx, &meta); + if (err) + return err; + } + return 0; } @@ -13522,6 +13980,13 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + struct bpf_kfunc_call_arg_meta meta; + + ret = fetch_kfunc_meta(env, insn, &meta, NULL); + if (ret == 0 && is_iter_next_kfunc(&meta)) + mark_prune_point(env, t); + } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); case BPF_JA: @@ -14275,6 +14740,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * didn't use them */ for (i = 0; i < old->allocated_stack; i++) { + struct bpf_reg_state *old_reg, *cur_reg; + spi = i / BPF_REG_SIZE; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { @@ -14331,9 +14798,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return false; break; case STACK_DYNPTR: - { - const struct bpf_reg_state *old_reg, *cur_reg; - old_reg = &old->stack[spi].spilled_ptr; cur_reg = &cur->stack[spi].spilled_ptr; if (old_reg->dynptr.type != cur_reg->dynptr.type || @@ -14341,7 +14805,22 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; - } + case STACK_ITER: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + /* iter.depth is not compared between states as it + * doesn't matter for correctness and would otherwise + * prevent convergence; we maintain it only to prevent + * infinite loop check triggering, see + * iter_active_depths_differ() + */ + if (old_reg->iter.btf != cur_reg->iter.btf || + old_reg->iter.btf_id != cur_reg->iter.btf_id || + old_reg->iter.state != cur_reg->iter.state || + /* ignore {old_reg,cur_reg}->iter.depth, see above */ + !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -14600,6 +15079,92 @@ static bool states_maybe_looping(struct bpf_verifier_state *old, return true; } +static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].is_iter_next; +} + +/* is_state_visited() handles iter_next() (see process_iter_next_call() for + * terminology) calls specially: as opposed to bounded BPF loops, it *expects* + * states to match, which otherwise would look like an infinite loop. So while + * iter_next() calls are taken care of, we still need to be careful and + * prevent erroneous and too eager declaration of "ininite loop", when + * iterators are involved. + * + * Here's a situation in pseudo-BPF assembly form: + * + * 0: again: ; set up iter_next() call args + * 1: r1 = &it ; + * 2: call bpf_iter_num_next ; this is iter_next() call + * 3: if r0 == 0 goto done + * 4: ... something useful here ... + * 5: goto again ; another iteration + * 6: done: + * 7: r1 = &it + * 8: call bpf_iter_num_destroy ; clean up iter state + * 9: exit + * + * This is a typical loop. Let's assume that we have a prune point at 1:, + * before we get to `call bpf_iter_num_next` (e.g., because of that `goto + * again`, assuming other heuristics don't get in a way). + * + * When we first time come to 1:, let's say we have some state X. We proceed + * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. + * Now we come back to validate that forked ACTIVE state. We proceed through + * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we + * are converging. But the problem is that we don't know that yet, as this + * convergence has to happen at iter_next() call site only. So if nothing is + * done, at 1: verifier will use bounded loop logic and declare infinite + * looping (and would be *technically* correct, if not for iterator's + * "eventual sticky NULL" contract, see process_iter_next_call()). But we + * don't want that. So what we do in process_iter_next_call() when we go on + * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's + * a different iteration. So when we suspect an infinite loop, we additionally + * check if any of the *ACTIVE* iterator states depths differ. If yes, we + * pretend we are not looping and wait for next iter_next() call. + * + * This only applies to ACTIVE state. In DRAINED state we don't expect to + * loop, because that would actually mean infinite loop, as DRAINED state is + * "sticky", and so we'll keep returning into the same instruction with the + * same state (at least in one of possible code paths). + * + * This approach allows to keep infinite loop heuristic even in the face of + * active iterator. E.g., C snippet below is and will be detected as + * inifintely looping: + * + * struct bpf_iter_num it; + * int *p, x; + * + * bpf_iter_num_new(&it, 0, 10); + * while ((p = bpf_iter_num_next(&t))) { + * x = p; + * while (x--) {} // <<-- infinite loop here + * } + * + */ +static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) +{ + struct bpf_reg_state *slot, *cur_slot; + struct bpf_func_state *state; + int i, fr; + + for (fr = old->curframe; fr >= 0; fr--) { + state = old->frame[fr]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_ITER) + continue; + + slot = &state->stack[i].spilled_ptr; + if (slot->iter.state != BPF_ITER_STATE_ACTIVE) + continue; + + cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; + if (cur_slot->iter.depth != slot->iter.depth) + return true; + } + } + return false; +} static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { @@ -14647,8 +15212,46 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * Since the verifier still needs to catch infinite loops * inside async callbacks. */ - } else if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur)) { + goto skip_inf_loop_check; + } + /* BPF open-coded iterators loop detection is special. + * states_maybe_looping() logic is too simplistic in detecting + * states that *might* be equivalent, because it doesn't know + * about ID remapping, so don't even perform it. + * See process_iter_next_call() and iter_active_depths_differ() + * for overview of the logic. When current and one of parent + * states are detected as equivalent, it's a good thing: we prove + * convergence and can stop simulating further iterations. + * It's safe to assume that iterator loop will finish, taking into + * account iter_next() contract of eventually returning + * sticky NULL result. + */ + if (is_iter_next_insn(env, insn_idx)) { + if (states_equal(env, &sl->state, cur)) { + struct bpf_func_state *cur_frame; + struct bpf_reg_state *iter_state, *iter_reg; + int spi; + + cur_frame = cur->frame[cur->curframe]; + /* btf_check_iter_kfuncs() enforces that + * iter state pointer is always the first arg + */ + iter_reg = &cur_frame->regs[BPF_REG_1]; + /* current state is valid due to states_equal(), + * so we can assume valid iter and reg state, + * no need for extra (re-)validations + */ + spi = __get_spi(iter_reg->off + iter_reg->var_off.value); + iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; + if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) + goto hit; + } + goto skip_inf_loop_check; + } + /* attempt to detect infinite loop to avoid unnecessary doomed work */ + if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur) && + !iter_active_depths_differ(&sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); return -EINVAL; @@ -14665,6 +15268,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * This threshold shouldn't be too high either, since states * at the end of the loop are likely to be useful in pruning. */ +skip_inf_loop_check: if (!env->test_state_freq && env->jmps_processed - env->prev_jmps_processed < 20 && env->insn_processed - env->prev_insn_processed < 100) @@ -14672,6 +15276,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto miss; } if (states_equal(env, &sl->state, cur)) { +hit: sl->hit_cnt++; /* reached equivalent register/stack state, * prune the search. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 976b194eb7754..4abddb668a107 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index a02a085e7f32b..34cb8b2de8ca3 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -8,6 +8,7 @@ dynptr/test_dynptr_skb_data dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +iters/testmod_seq* # s390x doesn't support kfuncs in modules yet kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test # relies on fentry ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 46500636d8cd9..5e6e85c8d77de 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -65,6 +65,34 @@ bpf_testmod_test_mod_kfunc(int i) *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; } +__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) +{ + if (cnt < 0) { + it->cnt = 0; + return -EINVAL; + } + + it->value = value; + it->cnt = cnt; + + return 0; +} + +__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it) +{ + if (it->cnt <= 0) + return NULL; + + it->cnt--; + + return &it->value; +} + +__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) +{ + it->cnt = 0; +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -220,6 +248,17 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_SET8_END(bpf_testmod_common_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_testmod_common_kfunc_ids, +}; + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_SET8_END(bpf_testmod_check_kfunc_ids) @@ -235,7 +274,8 @@ static int bpf_testmod_init(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index 0d71e2607832e..f32793efe095b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -22,4 +22,10 @@ struct bpf_testmod_test_writable_ctx { int val; }; +/* BPF iter that returns *value* *n* times in a row */ +struct bpf_iter_testmod_seq { + s64 value; + int cnt; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 5ca252823294b..731c343897d8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -144,6 +144,12 @@ void test_verif_scale_pyperf600_nounroll() scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_pyperf600_iter() +{ + /* open-coded BPF iterator version */ + scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_loop1() { scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c new file mode 100644 index 0000000000000..10804ae5ae972 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include + +#include "iters.skel.h" +#include "iters_state_safety.skel.h" +#include "iters_looping.skel.h" +#include "iters_num.skel.h" +#include "iters_testmod_seq.skel.h" + +static void subtest_num_iters(void) +{ + struct iters_num *skel; + int err; + + skel = iters_num__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_num__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_num__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty_zero); + VALIDATE_CASE(empty_int_min); + VALIDATE_CASE(empty_int_max); + VALIDATE_CASE(empty_minus_one); + + VALIDATE_CASE(simple_sum); + VALIDATE_CASE(neg_sum); + VALIDATE_CASE(very_neg_sum); + VALIDATE_CASE(neg_pos_sum); + + VALIDATE_CASE(invalid_range); + VALIDATE_CASE(max_range); + VALIDATE_CASE(e2big_range); + + VALIDATE_CASE(succ_elem_cnt); + VALIDATE_CASE(overfetched_elem_cnt); + VALIDATE_CASE(fail_elem_cnt); + +#undef VALIDATE_CASE + +cleanup: + iters_num__destroy(skel); +} + +static void subtest_testmod_seq_iters(void) +{ + struct iters_testmod_seq *skel; + int err; + + if (!env.has_testmod) { + test__skip(); + return; + } + + skel = iters_testmod_seq__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = iters_testmod_seq__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + usleep(1); + iters_testmod_seq__detach(skel); + +#define VALIDATE_CASE(case_name) \ + ASSERT_EQ(skel->bss->res_##case_name, \ + skel->rodata->exp_##case_name, \ + #case_name) + + VALIDATE_CASE(empty); + VALIDATE_CASE(full); + VALIDATE_CASE(truncated); + +#undef VALIDATE_CASE + +cleanup: + iters_testmod_seq__destroy(skel); +} + +void test_iters(void) +{ + RUN_TESTS(iters_state_safety); + RUN_TESTS(iters_looping); + RUN_TESTS(iters); + + if (env.has_testmod) + RUN_TESTS(iters_testmod_seq); + + if (test__start_subtest("num")) + subtest_num_iters(); + if (test__start_subtest("testmod_seq")) + subtest_testmod_seq_iters(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c index 6558c857e6206..d5b3377aa33c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -3,7 +3,6 @@ #include #include "test_uprobe_autoattach.skel.h" -#include "progs/bpf_misc.h" /* uprobe attach point */ static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3, diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index f704885aa5342..43b154a639e78 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -36,6 +36,7 @@ #define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory" #define __imm(name) [name]"i"(name) #define __imm_addr(name) [name]"i"(&name) +#define __imm_ptr(name) [name]"p"(&name) #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 @@ -75,5 +76,104 @@ #define FUNC_REG_ARG_CNT 5 #endif +struct bpf_iter_num; + +extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; +extern int *bpf_iter_num_next(struct bpf_iter_num *it) __ksym; +extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; + +#ifndef bpf_for_each +/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for + * using BPF open-coded iterators without having to write mundane explicit + * low-level loop logic. Instead, it provides for()-like generic construct + * that can be used pretty naturally. E.g., for some hypothetical cgroup + * iterator, you'd write: + * + * struct cgroup *cg, *parent_cg = <...>; + * + * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) { + * bpf_printk("Child cgroup id = %d", cg->cgroup_id); + * if (cg->cgroup_id == 123) + * break; + * } + * + * I.e., it looks almost like high-level for each loop in other languages, + * supports continue/break, and is verifiable by BPF verifier. + * + * For iterating integers, the difference betwen bpf_for_each(num, i, N, M) + * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to + * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int + * *`, not just `int`. So for integers bpf_for() is more convenient. + * + * Note: this macro relies on C99 feature of allowing to declare variables + * inside for() loop, bound to for() loop lifetime. It also utilizes GCC + * extension: __attribute__((cleanup())), supported by both GCC and + * Clang. + */ +#define bpf_for_each(type, cur, args...) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ + cleanup(bpf_iter_##type##_destroy))), \ + /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ + *___p = (bpf_iter_##type##_new(&___it, ##args), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_##type##_destroy, (void *)0); \ + /* iteration and termination check */ \ + (((cur) = bpf_iter_##type##_next(&___it))); \ +) +#endif /* bpf_for_each */ + +#ifndef bpf_for +/* bpf_for(i, start, end) implements a for()-like looping construct that sets + * provided integer variable *i* to values starting from *start* through, + * but not including, *end*. It also proves to BPF verifier that *i* belongs + * to range [start, end), so this can be used for accessing arrays without + * extra checks. + * + * Note: *start* and *end* are assumed to be expressions with no side effects + * and whose values do not change throughout bpf_for() loop execution. They do + * not have to be statically known or constant, though. + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_for(i, start, end) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, (start), (end)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + ({ \ + /* iteration step */ \ + int *___t = bpf_iter_num_next(&___it); \ + /* termination and bounds check */ \ + (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \ + }); \ +) +#endif /* bpf_for */ + +#ifndef bpf_repeat +/* bpf_repeat(N) performs N iterations without exposing iteration number + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_repeat(N) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p = (bpf_iter_num_new(&___it, 0, (N)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + bpf_iter_num_next(&___it); \ + /* nothing here */ \ +) +#endif /* bpf_repeat */ #endif diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c new file mode 100644 index 0000000000000..84e5dc10243ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include "bpf_misc.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static volatile int zero = 0; + +int my_pid; +int arr[256]; +int small_arr[16] SEC(".data.small_arr"); + +#ifdef REAL_TEST +#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 +#else +#define MY_PID_GUARD() ({ }) +#endif + +SEC("?raw_tp") +__failure __msg("math between map_value pointer and register with unbounded min value is not allowed") +int iter_err_unsafe_c_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = zero; /* obscure initial value of i */ + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 1000); + while ((v = bpf_iter_num_next(&it))) { + i++; + } + bpf_iter_num_destroy(&it); + + small_arr[i] = 123; /* invalid */ + + return 0; +} + +SEC("?raw_tp") +__failure __msg("unbounded memory access") +int iter_err_unsafe_asm_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0; + + MY_PID_GUARD(); + + asm volatile ( + "r6 = %[zero];" /* iteration counter */ + "r1 = %[it];" /* iterator state */ + "r2 = 0;" + "r3 = 1000;" + "r4 = 1;" + "call %[bpf_iter_num_new];" + "loop:" + "r1 = %[it];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto out;" + "r6 += 1;" + "goto loop;" + "out:" + "r1 = %[it];" + "call %[bpf_iter_num_destroy];" + "r1 = %[small_arr];" + "r2 = r6;" + "r2 <<= 2;" + "r1 += r2;" + "*(u32 *)(r1 + 0) = r6;" /* invalid */ + : + : [it]"r"(&it), + [small_arr]"p"(small_arr), + [zero]"p"(zero), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_common, "r6" + ); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_while_loop_auto_cleanup(const void *ctx) +{ + __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + /* (!) no explicit bpf_iter_num_destroy() */ + + return 0; +} + +SEC("raw_tp") +__success +int iter_for_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_each_macro(const void *ctx) +{ + int *v; + + MY_PID_GUARD(); + + bpf_for_each(num, v, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_bpf_for_macro(const void *ctx) +{ + int i; + + MY_PID_GUARD(); + + bpf_for(i, 5, 10) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", i); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_pragma_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_manual_unroll_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_multiple_sequential_loops(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 3); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 5, 10); + for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) { + bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 0, 2); +#pragma nounroll + for (i = 0; i < 3; i++) { + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1); + } + bpf_iter_num_destroy(&it); + + bpf_iter_num_new(&it, 100, 200); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1); + v = bpf_iter_num_next(&it); + bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1); + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_limit_cond_break_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, i = 0, sum = 0; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v); + sum += *v; + + i++; + if (i > 3) + break; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_SIMPLE: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_obfuscate_counter(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + /* Make i's initial value unknowable for verifier to prevent it from + * pruning if/else branch inside the loop body and marking i as precise. + */ + int i = zero; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + int x; + + i += 1; + + /* If we initialized i as `int i = 0;` above, verifier would + * track that i becomes 1 on first iteration after increment + * above, and here verifier would eagerly prune else branch + * and mark i as precise, ruining open-coded iterator logic + * completely, as each next iteration would have a different + * *precise* value of i, and thus there would be no + * convergence of state. This would result in reaching maximum + * instruction limit, no matter what the limit is. + */ + if (i == 1) + x = 123; + else + x = i * 3 + 1; + + bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x); + + sum += x; + } + bpf_iter_num_destroy(&it); + + bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum); + + return 0; +} + +SEC("raw_tp") +__success +int iter_search_loop(const void *ctx) +{ + struct bpf_iter_num it; + int *v, *elem = NULL; + bool found = false; + + MY_PID_GUARD(); + + bpf_iter_num_new(&it, 0, 10); + + while ((v = bpf_iter_num_next(&it))) { + bpf_printk("ITER_SEARCH_LOOP: v=%d", *v); + + if (*v == 2) { + found = true; + elem = v; + barrier_var(elem); + } + } + + /* should fail to verify if bpf_iter_num_destroy() is here */ + + if (found) + /* here found element will be wrong, we should have copied + * value to a variable, but here we want to make sure we can + * access memory after the loop anyways + */ + bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem); + else + bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n"); + + bpf_iter_num_destroy(&it); + + return 0; +} + +SEC("raw_tp") +__success +int iter_array_fill(const void *ctx) +{ + int sum, i; + + MY_PID_GUARD(); + + bpf_for(i, 0, ARRAY_SIZE(arr)) { + arr[i] = i * 2; + } + + sum = 0; + bpf_for(i, 0, ARRAY_SIZE(arr)) { + sum += arr[i]; + } + + bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256); + + return 0; +} + +static int arr2d[4][5]; +static int arr2d_row_sums[4]; +static int arr2d_col_sums[5]; + +SEC("raw_tp") +__success +int iter_nested_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + } + + bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +SEC("raw_tp") +__success +int iter_nested_deeply_iters(const void *ctx) +{ + int sum = 0; + + MY_PID_GUARD(); + + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + bpf_repeat(10) { + sum += 1; + } + } + } + } + /* validate that we can break from inside bpf_repeat() */ + break; + } + + return sum; +} + +static __noinline void fill_inner_dimension(int row) +{ + int col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d[row][col] = row * col; + } +} + +static __noinline int sum_inner_dimension(int row) +{ + int sum = 0, col; + + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + sum += arr2d[row][col]; + arr2d_row_sums[row] += arr2d[row][col]; + arr2d_col_sums[col] += arr2d[row][col]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_subprog_iters(const void *ctx) +{ + int sum, row, col; + + MY_PID_GUARD(); + + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + fill_inner_dimension(row); + } + + /* zero-initialize sums */ + sum = 0; + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + arr2d_row_sums[row] = 0; + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + arr2d_col_sums[col] = 0; + } + + /* calculate sums */ + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + sum += sum_inner_dimension(row); + } + + bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum); + bpf_for(row, 0, ARRAY_SIZE(arr2d)) { + bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d", + row, arr2d_row_sums[row]); + } + bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) { + bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s", + col, arr2d_col_sums[col], + col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : ""); + } + + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1000); +} arr_map SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int iter_err_too_permissive1(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = NULL; + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive2(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + + MY_PID_GUARD(); + + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (!map_val) + return 0; + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + } + + *map_val = 123; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid mem access 'map_value_or_null'") +int iter_err_too_permissive3(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + found = true; + } + + if (found) + *map_val = 123; + + return 0; +} + +SEC("raw_tp") +__success +int iter_tricky_but_fine(const void *ctx) +{ + int *map_val = NULL; + int key = 0; + bool found = false; + + MY_PID_GUARD(); + + bpf_repeat(1000000) { + map_val = bpf_map_lookup_elem(&arr_map, &key); + if (map_val) { + found = true; + break; + } + } + + if (found) + *map_val = 123; + + return 0; +} + +#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0) + +SEC("raw_tp") +__success +int iter_stack_array_loop(const void *ctx) +{ + long arr1[16], arr2[16], sum = 0; + int *v, i; + + MY_PID_GUARD(); + + /* zero-init arr1 and arr2 in such a way that verifier doesn't know + * it's all zeros; if we don't do that, we'll make BPF verifier track + * all combination of zero/non-zero stack slots for arr1/arr2, which + * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different + * states + */ + __bpf_memzero(arr1, sizeof(arr1)); + __bpf_memzero(arr2, sizeof(arr1)); + + /* validate that we can break and continue when using bpf_for() */ + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + if (i & 1) { + arr1[i] = i; + continue; + } else { + arr2[i] = i; + break; + } + } + + bpf_for(i, 0, ARRAY_SIZE(arr1)) { + sum += arr1[i] + arr2[i]; + } + + return sum; +} + +static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) +{ + int *t, i; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + arr[i] = i * mul; + } +} + +static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) +{ + int *t, i, sum = 0;; + + while ((t = bpf_iter_num_next(it))) { + i = *t; + if (i >= n) + break; + sum += arr[i]; + } + + return sum; +} + +SEC("raw_tp") +__success +int iter_pass_iter_ptr_to_subprog(const void *ctx) +{ + int arr1[16], arr2[32]; + struct bpf_iter_num it; + int n, sum1, sum2; + + MY_PID_GUARD(); + + /* fill arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr1, n, 2); + bpf_iter_num_destroy(&it); + + /* fill arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + fill(&it, arr2, n, 10); + bpf_iter_num_destroy(&it); + + /* sum arr1 */ + n = ARRAY_SIZE(arr1); + bpf_iter_num_new(&it, 0, n); + sum1 = sum(&it, arr1, n); + bpf_iter_num_destroy(&it); + + /* sum arr2 */ + n = ARRAY_SIZE(arr2); + bpf_iter_num_new(&it, 0, n); + sum2 = sum(&it, arr2, n); + bpf_iter_num_destroy(&it); + + bpf_printk("sum1=%d, sum2=%d", sum1, sum2); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c new file mode 100644 index 0000000000000..05fa5ce7fc594 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success +int consume_first_item_only(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + "r0 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R0 invalid mem access 'scalar'") +int missing_null_check_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + /* FAIL: deref with no NULL check */ + "r1 = *(u32 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure +__msg("invalid access to memory, mem_size=4 off=0 size=8") +__msg("R0 min value is outside of the allowed memory range") +int wrong_sized_read_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* consume first element */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto +1;" + /* FAIL: deref more than available 4 bytes */ + "r0 = *(u64 *)(r0 + 0);" + + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +int simplest_loop(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + "r6 = 0;" /* init sum */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + + "if r0 == 0 goto 2f;" + "r0 = *(u32 *)(r0 + 0);" + "r6 += r0;" /* accumulate sum */ + "goto 1b;" + + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c new file mode 100644 index 0000000000000..7a77a8daee0dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_num.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +const volatile __s64 exp_empty_zero = 0 + 1; +__s64 res_empty_zero; + +SEC("raw_tp/sys_enter") +int num_empty_zero(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 0) sum += i; + res_empty_zero = 1 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_min = 0 + 2; +__s64 res_empty_int_min; + +SEC("raw_tp/sys_enter") +int num_empty_int_min(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN) sum += i; + res_empty_int_min = 2 + sum; + + return 0; +} + +const volatile __s64 exp_empty_int_max = 0 + 3; +__s64 res_empty_int_max; + +SEC("raw_tp/sys_enter") +int num_empty_int_max(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX, INT_MAX) sum += i; + res_empty_int_max = 3 + sum; + + return 0; +} + +const volatile __s64 exp_empty_minus_one = 0 + 4; +__s64 res_empty_minus_one; + +SEC("raw_tp/sys_enter") +int num_empty_minus_one(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -1, -1) sum += i; + res_empty_minus_one = 4 + sum; + + return 0; +} + +const volatile __s64 exp_simple_sum = 9 * 10 / 2; +__s64 res_simple_sum; + +SEC("raw_tp/sys_enter") +int num_simple_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, 0, 10) sum += i; + res_simple_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_sum = -11 * 10 / 2; +__s64 res_neg_sum; + +SEC("raw_tp/sys_enter") +int num_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -10, 0) sum += i; + res_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1); +__s64 res_very_neg_sum; + +SEC("raw_tp/sys_enter") +int num_very_neg_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MIN, INT_MIN + 2) sum += i; + res_very_neg_sum = sum; + + return 0; +} + +const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2); +__s64 res_very_big_sum; + +SEC("raw_tp/sys_enter") +int num_very_big_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, INT_MAX - 2, INT_MAX) sum += i; + res_very_big_sum = sum; + + return 0; +} + +const volatile __s64 exp_neg_pos_sum = -3; +__s64 res_neg_pos_sum; + +SEC("raw_tp/sys_enter") +int num_neg_pos_sum(const void *ctx) +{ + __s64 sum = 0, i; + + bpf_for(i, -3, 3) sum += i; + res_neg_pos_sum = sum; + + return 0; +} + +const volatile __s64 exp_invalid_range = -EINVAL; +__s64 res_invalid_range; + +SEC("raw_tp/sys_enter") +int num_invalid_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_invalid_range = bpf_iter_num_new(&it, 1, 0); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_max_range = 0 + 10; +__s64 res_max_range; + +SEC("raw_tp/sys_enter") +int num_max_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_e2big_range = -E2BIG; +__s64 res_e2big_range; + +SEC("raw_tp/sys_enter") +int num_e2big_range(const void *ctx) +{ + struct bpf_iter_num it; + + res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS); + bpf_iter_num_destroy(&it); + + return 0; +} + +const volatile __s64 exp_succ_elem_cnt = 10; +__s64 res_succ_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_succ_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v; + + bpf_iter_num_new(&it, 0, 10); + while ((v = bpf_iter_num_next(&it))) { + cnt++; + } + bpf_iter_num_destroy(&it); + + res_succ_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_overfetched_elem_cnt = 5; +__s64 res_overfetched_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_overfetched_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 0, 5); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_overfetched_elem_cnt = cnt; + + return 0; +} + +const volatile __s64 exp_fail_elem_cnt = 20 + 0; +__s64 res_fail_elem_cnt; + +SEC("raw_tp/sys_enter") +int num_fail_elem_cnt(const void *ctx) +{ + struct bpf_iter_num it; + int cnt = 0, *v, i; + + bpf_iter_num_new(&it, 100, 10); + for (i = 0; i < 10; i++) { + v = bpf_iter_num_next(&it); + if (v) + cnt++; + } + bpf_iter_num_destroy(&it); + + res_fail_elem_cnt = 20 + cnt; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c new file mode 100644 index 0000000000000..d47e59aba6de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define ITER_HELPERS \ + __imm(bpf_iter_num_new), \ + __imm(bpf_iter_num_next), \ + __imm(bpf_iter_num_destroy) + +SEC("?raw_tp") +__success +int force_clang_to_emit_btf_for_externs(void *ctx) +{ + /* we need this as a workaround to enforce compiler emitting BTF + * information for bpf_iter_num_{new,next,destroy}() kfuncs, + * as, apparently, it doesn't emit it for symbols only referenced from + * assembly (or cleanup attribute, for that matter, as well) + */ + bpf_repeat(0); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int create_and_destroy(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int create_and_forget_to_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int destroy_without_creating_fail(void *ctx) +{ + /* init with zeros to stop verifier complaining about uninit stack */ + struct bpf_iter_num iter; + + asm volatile ( + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_direct_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("Unreleased reference id=1") +int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* directly write over first half of iter state */ + "*(u64 *)(%[iter] + 0) = r0;" + + /* don't destroy iter, leaking ref, which should fail */ + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int compromise_iter_w_helper_write_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* overwrite 8th byte with bpf_probe_read_kernel() */ + "r1 = %[iter];" + "r1 += 7;" + "r2 = 1;" + "r3 = 0;" /* NULL */ + "call %[bpf_probe_read_kernel];" + + /* (attempt to) destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel) + : __clobber_common + ); + + return 0; +} + +static __noinline void subprog_with_iter(void) +{ + struct bpf_iter_num iter; + + bpf_iter_num_new(&iter, 0, 1); + + return; +} + +SEC("?raw_tp") +__failure +/* ensure there was a call to subprog, which might happen without __noinline */ +__msg("returning from callee:") +__msg("Unreleased reference id=1") +int leak_iter_from_subprog_fail(void *ctx) +{ + subprog_with_iter(); + + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +int valid_stack_reuse(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + + /* now reuse same stack slots */ + + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected uninitialized iter_num as arg #1") +int double_create_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* (attempt to) create iterator again */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int double_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* (attempt to) destroy iterator again */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_without_new_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("expected an initialized iter_num as arg #1") +int next_after_destroy_fail(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + /* don't create iterator and try to iterate*/ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common + ); + + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid read from stack") +int __naked read_from_iter_slot_fail(void) +{ + asm volatile ( + /* r6 points to struct bpf_iter_num on the stack */ + "r6 = r10;" + "r6 += -24;" + + /* create iterator */ + "r1 = r6;" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* attemp to leak bpf_iter_num state */ + "r7 = *(u64 *)(r6 + 0);" + "r8 = *(u64 *)(r6 + 8);" + + /* destroy iterator */ + "r1 = r6;" + "call %[bpf_iter_num_destroy];" + + /* leak bpf_iter_num state */ + "r0 = r7;" + "if r7 > r8 goto +1;" + "r0 = r8;" + "exit;" + : + : ITER_HELPERS + : __clobber_common, "r6", "r7", "r8" + ); +} + +int zero; + +SEC("?raw_tp") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__msg("Unreleased reference") +int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile ( + /* Create a fork in logic, with general setup as follows: + * - fallthrough (first) path is valid; + * - branch (second) path is invalid. + * Then depending on what we do in fallthrough vs branch path, + * we try to detect bugs in func_states_equal(), regsafe(), + * refsafe(), stack_safe(), and similar by tricking verifier + * into believing that branch state is a valid subset of + * a fallthrough state. Verifier should reject overall + * validation, unless there is a bug somewhere in verifier + * logic. + */ + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + + "if r6 > r7 goto bad;" /* fork */ + + /* spill r6 into stack slot of bpf_iter_num var */ + "*(u64 *)(%[iter] + 0) = r6;" + + "goto skip_bad;" + + "bad:" + /* create iterator in the same stack slot */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 1000;" + "call %[bpf_iter_num_new];" + + /* but then forget about it and overwrite it back to r6 spill */ + "*(u64 *)(%[iter] + 0) = r6;" + + "skip_bad:" + "goto +0;" /* force checkpoint */ + + /* corrupt stack slots, if they are really dynptr */ + "*(u64 *)(%[iter] + 0) = r6;" + : + : __imm_ptr(iter), + __imm_addr(zero), + __imm(bpf_get_prandom_u32), + __imm(bpf_dynptr_from_mem), + ITER_HELPERS + : __clobber_common, "r6", "r7" + ); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c new file mode 100644 index 0000000000000..3873fb6c292a1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +struct bpf_iter_testmod_seq { + u64 :64; + u64 :64; +}; + +extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym; +extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym; +extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym; + +const volatile __s64 exp_empty = 0 + 1; +__s64 res_empty; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_empty(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 0) sum += *i; + res_empty = 1 + sum; + + return 0; +} + +const volatile __s64 exp_full = 1000000; +__s64 res_full; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_full(const void *ctx) +{ + __s64 sum = 0, *i; + + bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i; + res_full = sum; + + return 0; +} + +const volatile __s64 exp_truncated = 10 * 1000000; +__s64 res_truncated; + +static volatile int zero = 0; + +SEC("raw_tp/sys_enter") +__success __log_level(2) +__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") +__msg("call bpf_iter_testmod_seq_destroy") +int testmod_seq_truncated(const void *ctx) +{ + __s64 sum = 0, *i; + int cnt = zero; + + bpf_for_each(testmod_seq, i, 10, 2000000) { + sum += *i; + cnt++; + if (cnt >= 1000000) + break; + } + res_truncated = sum; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index dc93887ed34c7..fadfdd98707c4 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -4,12 +4,12 @@ * Copyright 2020 Google LLC. */ -#include "bpf_misc.h" #include "vmlinux.h" +#include #include #include #include -#include +#include "bpf_misc.h" struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 6c7b1fb268d63..f2e7a31c8d751 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -7,6 +7,7 @@ #include #include #include +#include "bpf_misc.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -294,17 +295,22 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) if (ctx.done) return 0; #else -#ifdef NO_UNROLL +#if defined(USE_ITER) +/* no for loop, no unrolling */ +#elif defined(NO_UNROLL) #pragma clang loop unroll(disable) -#else -#ifdef UNROLL_COUNT +#elif defined(UNROLL_COUNT) #pragma clang loop unroll_count(UNROLL_COUNT) #else #pragma clang loop unroll(full) -#endif #endif /* NO_UNROLL */ /* Unwind python stack */ +#ifdef USE_ITER + int i; + bpf_for(i, 0, STACK_MAX_LEN) { +#else /* !USE_ITER */ for (int i = 0; i < STACK_MAX_LEN; ++i) { +#endif if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c new file mode 100644 index 0000000000000..d62e1b200c30e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Meta Platforms, Inc. and affiliates. +#define STACK_MAX_LEN 600 +#define SUBPROGS +#define NO_UNROLL +#define USE_ITER +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c index 6beff7502f4d7..520b58c4f8db6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c +++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c @@ -2,7 +2,4 @@ // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 600 #define NO_UNROLL -/* clang will not unroll at all. - * Total program size is around 2k insns - */ #include "pyperf.h"