diff --git a/libbpfgo.c b/libbpfgo.c index ef04df66..8efebdfd 100644 --- a/libbpfgo.c +++ b/libbpfgo.c @@ -182,3 +182,56 @@ void cgo_bpf_object_open_opts_free(struct bpf_object_open_opts *opts) { free(opts); } + +struct bpf_map_create_opts *cgo_bpf_map_create_opts_new(__u32 btf_fd, + __u32 btf_key_type_id, + __u32 btf_value_type_id, + __u32 btf_vmlinux_value_type_id, + __u32 inner_map_fd, + __u32 map_flags, + __u64 map_extra, + __u32 numa_node, + __u32 map_ifindex) +{ + struct bpf_map_create_opts *opts; + opts = calloc(1, sizeof(*opts)); + if (!opts) + return NULL; + + opts->sz = sizeof(*opts); + opts->btf_fd = btf_fd; + opts->btf_key_type_id = btf_key_type_id; + opts->btf_value_type_id = btf_value_type_id; + opts->btf_vmlinux_value_type_id = btf_vmlinux_value_type_id; + opts->inner_map_fd = inner_map_fd; + opts->map_flags = map_flags; + opts->map_extra = map_extra; + opts->numa_node = numa_node; + opts->map_ifindex = map_ifindex; + + return opts; +} + +void cgo_bpf_map_create_opts_free(struct bpf_map_create_opts *opts) +{ + free(opts); +} + +struct bpf_map_batch_opts *cgo_bpf_map_batch_opts_new(__u64 elem_flags, __u64 flags) +{ + struct bpf_map_batch_opts *opts; + opts = calloc(1, sizeof(*opts)); + if (!opts) + return NULL; + + opts->sz = sizeof(*opts); + opts->elem_flags = elem_flags; + opts->flags = flags; + + return opts; +} + +void cgo_bpf_map_batch_opts_free(struct bpf_map_batch_opts *opts) +{ + free(opts); +} diff --git a/libbpfgo.go b/libbpfgo.go index f1c39ab1..02d9794d 100644 --- a/libbpfgo.go +++ b/libbpfgo.go @@ -368,7 +368,10 @@ func (m *Module) InitGlobalVariable(name string, value interface{}) error { } // get current value - currMapValue := bpfMap.getInitialValue() + currMapValue, err := bpfMap.InitialValue() + if err != nil { + return err + } // generate new value newMapValue := make([]byte, bpfMap.ValueSize()) @@ -386,24 +389,71 @@ func (m *Module) InitGlobalVariable(name string, value interface{}) error { copy(newMapValue[start:end], varValue) // save new value - err = bpfMap.setInitialValue(unsafe.Pointer(&newMapValue[0])) + err = bpfMap.SetInitialValue(unsafe.Pointer(&newMapValue[0])) return err } func (m *Module) GetMap(mapName string) (*BPFMap, error) { cs := C.CString(mapName) - bpfMap, errno := C.bpf_object__find_map_by_name(m.obj, cs) + bpfMapC, errno := C.bpf_object__find_map_by_name(m.obj, cs) C.free(unsafe.Pointer(cs)) - if bpfMap == nil { + if bpfMapC == nil { return nil, fmt.Errorf("failed to find BPF map %s: %w", mapName, errno) } - return &BPFMap{ - bpfMap: bpfMap, - name: mapName, - fd: C.bpf_map__fd(bpfMap), + bpfMap := &BPFMap{ + bpfMap: bpfMapC, module: m, - }, nil + } + + if !m.loaded { + bpfMap.bpfMapLow = &BPFMapLow{ + fd: -1, + info: &BPFMapInfo{}, + } + + return bpfMap, nil + } + + fd := bpfMap.FileDescriptor() + info, err := GetMapInfoByFD(fd) + if err != nil { + // Compatibility Note: Some older kernels lack BTF (BPF Type Format) + // support for specific BPF map types. In such scenarios, libbpf may + // fail (EPERM) when attempting to retrieve information for these maps. + // Reference: https://elixir.bootlin.com/linux/v5.15.75/source/tools/lib/bpf/gen_loader.c#L401 + // + // However, we can still get some map info from the BPF map high level API. + bpfMap.bpfMapLow = &BPFMapLow{ + fd: fd, + info: &BPFMapInfo{ + Type: bpfMap.Type(), + ID: 0, + KeySize: uint32(bpfMap.KeySize()), + ValueSize: uint32(bpfMap.ValueSize()), + MaxEntries: bpfMap.MaxEntries(), + MapFlags: uint32(bpfMap.MapFlags()), + Name: bpfMap.Name(), + IfIndex: bpfMap.IfIndex(), + BTFVmlinuxValueTypeID: 0, + NetnsDev: 0, + NetnsIno: 0, + BTFID: 0, + BTFKeyTypeID: 0, + BTFValueTypeID: 0, + MapExtra: bpfMap.MapExtra(), + }, + } + + return bpfMap, nil + } + + bpfMap.bpfMapLow = &BPFMapLow{ + fd: fd, + info: info, + } + + return bpfMap, nil } // BPFObjectProgramIterator iterates over maps in a BPF object @@ -431,15 +481,33 @@ func (it *BPFObjectIterator) NextMap() *BPFMap { if m == nil { return nil } - cName := C.bpf_map__name(m) bpfMap := &BPFMap{ - name: C.GoString(cName), bpfMap: m, module: it.m, } - it.prevMap = bpfMap + + if !bpfMap.module.loaded { + bpfMap.bpfMapLow = &BPFMapLow{ + fd: -1, + info: &BPFMapInfo{}, + } + + return bpfMap + } + + fd := bpfMap.FileDescriptor() + info, err := GetMapInfoByFD(fd) + if err != nil { + return nil + } + + bpfMap.bpfMapLow = &BPFMapLow{ + fd: fd, + info: info, + } + return bpfMap } @@ -1150,7 +1218,7 @@ func (m *Module) InitRingBuf(mapName string, eventsChan chan []byte) (*RingBuffe return nil, fmt.Errorf("max ring buffers reached") } - rb := C.cgo_init_ring_buf(bpfMap.fd, C.uintptr_t(slot)) + rb := C.cgo_init_ring_buf(C.int(bpfMap.FileDescriptor()), C.uintptr_t(slot)) if rb == nil { return nil, fmt.Errorf("failed to initialize ring buffer") } @@ -1264,7 +1332,7 @@ func (m *Module) InitPerfBuf(mapName string, eventsChan chan []byte, lostChan ch return nil, fmt.Errorf("max number of ring/perf buffers reached") } - pb := C.cgo_init_perf_buf(bpfMap.fd, C.int(pageCnt), C.uintptr_t(slot)) + pb := C.cgo_init_perf_buf(C.int(bpfMap.FileDescriptor()), C.int(pageCnt), C.uintptr_t(slot)) if pb == nil { eventChannels.remove(uint(slot)) return nil, fmt.Errorf("failed to initialize perf buffer") diff --git a/libbpfgo.h b/libbpfgo.h index 5b0a2906..4bb30c64 100644 --- a/libbpfgo.h +++ b/libbpfgo.h @@ -46,4 +46,18 @@ struct bpf_object_open_opts *cgo_bpf_object_open_opts_new(const char *btf_file_p const char *bpf_obj_name); void cgo_bpf_object_open_opts_free(struct bpf_object_open_opts *opts); +struct bpf_map_create_opts *cgo_bpf_map_create_opts_new(__u32 btf_fd, + __u32 btf_key_type_id, + __u32 btf_value_type_id, + __u32 btf_vmlinux_value_type_id, + __u32 inner_map_fd, + __u32 map_flags, + __u64 map_extra, + __u32 numa_node, + __u32 map_ifindex); +void cgo_bpf_map_create_opts_free(struct bpf_map_create_opts *opts); + +struct bpf_map_batch_opts *cgo_bpf_map_batch_opts_new(__u64 elem_flags, __u64 flags); +void cgo_bpf_map_batch_opts_free(struct bpf_map_batch_opts *opts); + #endif diff --git a/map-common.go b/map-common.go new file mode 100644 index 00000000..d16bb468 --- /dev/null +++ b/map-common.go @@ -0,0 +1,186 @@ +package libbpfgo + +/* +#cgo LDFLAGS: -lelf -lz +#include "libbpfgo.h" +*/ +import "C" + +import ( + "fmt" + "syscall" +) + +// +// MapType +// + +type MapType uint32 + +const ( + MapTypeUnspec MapType = iota + MapTypeHash + MapTypeArray + MapTypeProgArray + MapTypePerfEventArray + MapTypePerCPUHash + MapTypePerCPUArray + MapTypeStackTrace + MapTypeCgroupArray + MapTypeLRUHash + MapTypeLRUPerCPUHash + MapTypeLPMTrie + MapTypeArrayOfMaps + MapTypeHashOfMaps + MapTypeDevMap + MapTypeSockMap + MapTypeCPUMap + MapTypeXSKMap + MapTypeSockHash + MapTypeCgroupStorage + MapTypeReusePortSockArray + MapTypePerCPUCgroupStorage + MapTypeQueue + MapTypeStack + MapTypeSKStorage + MapTypeDevmapHash + MapTypeStructOps + MapTypeRingbuf + MapTypeInodeStorage + MapTypeTaskStorage + MapTypeBloomFilter +) + +var mapTypeToString = map[MapType]string{ + MapTypeUnspec: "BPF_MAP_TYPE_UNSPEC", + MapTypeHash: "BPF_MAP_TYPE_HASH", + MapTypeArray: "BPF_MAP_TYPE_ARRAY", + MapTypeProgArray: "BPF_MAP_TYPE_PROG_ARRAY", + MapTypePerfEventArray: "BPF_MAP_TYPE_PERF_EVENT_ARRAY", + MapTypePerCPUHash: "BPF_MAP_TYPE_PERCPU_HASH", + MapTypePerCPUArray: "BPF_MAP_TYPE_PERCPU_ARRAY", + MapTypeStackTrace: "BPF_MAP_TYPE_STACK_TRACE", + MapTypeCgroupArray: "BPF_MAP_TYPE_CGROUP_ARRAY", + MapTypeLRUHash: "BPF_MAP_TYPE_LRU_HASH", + MapTypeLRUPerCPUHash: "BPF_MAP_TYPE_LRU_PERCPU_HASH", + MapTypeLPMTrie: "BPF_MAP_TYPE_LPM_TRIE", + MapTypeArrayOfMaps: "BPF_MAP_TYPE_ARRAY_OF_MAPS", + MapTypeHashOfMaps: "BPF_MAP_TYPE_HASH_OF_MAPS", + MapTypeDevMap: "BPF_MAP_TYPE_DEVMAP", + MapTypeSockMap: "BPF_MAP_TYPE_SOCKMAP", + MapTypeCPUMap: "BPF_MAP_TYPE_CPUMAP", + MapTypeXSKMap: "BPF_MAP_TYPE_XSKMAP", + MapTypeSockHash: "BPF_MAP_TYPE_SOCKHASH", + MapTypeCgroupStorage: "BPF_MAP_TYPE_CGROUP_STORAGE", + MapTypeReusePortSockArray: "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY", + MapTypePerCPUCgroupStorage: "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE", + MapTypeQueue: "BPF_MAP_TYPE_QUEUE", + MapTypeStack: "BPF_MAP_TYPE_STACK", + MapTypeSKStorage: "BPF_MAP_TYPE_SK_STORAGE", + MapTypeDevmapHash: "BPF_MAP_TYPE_DEVMAP_HASH", + MapTypeStructOps: "BPF_MAP_TYPE_STRUCT_OPS", + MapTypeRingbuf: "BPF_MAP_TYPE_RINGBUF", + MapTypeInodeStorage: "BPF_MAP_TYPE_INODE_STORAGE", + MapTypeTaskStorage: "BPF_MAP_TYPE_TASK_STORAGE", + MapTypeBloomFilter: "BPF_MAP_TYPE_BLOOM_FILTER", +} + +func (t MapType) String() string { + return mapTypeToString[t] +} + +// +// MapFlag +// + +type MapFlag uint32 + +const ( + MapFlagUpdateAny MapFlag = iota // create new element or update existing + MapFlagUpdateNoExist // create new element if it didn't exist + MapFlagUpdateExist // update existing element + MapFlagFLock // spin_lock-ed map_lookup/map_update +) + +// +// BPFMapInfo +// + +// BPFMapInfo mirrors the C structure bpf_map_info. +type BPFMapInfo struct { + Type MapType + ID uint32 + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + MapFlags uint32 + Name string + IfIndex uint32 + BTFVmlinuxValueTypeID uint32 + NetnsDev uint64 + NetnsIno uint64 + BTFID uint32 + BTFKeyTypeID uint32 + BTFValueTypeID uint32 + MapExtra uint64 +} + +// GetMapInfoByFD returns the BPFMapInfo for the map with the given file descriptor. +func GetMapInfoByFD(fd int) (*BPFMapInfo, error) { + var info C.struct_bpf_map_info + var infoLen C.uint = C.uint(C.sizeof_struct_bpf_map_info) + + retC := C.bpf_map_get_info_by_fd(C.int(fd), &info, &infoLen) + if retC < 0 { + return nil, fmt.Errorf("failed to get map info for fd %d: %w", fd, syscall.Errno(-retC)) + } + + return &BPFMapInfo{ + Type: MapType(uint32(info._type)), + ID: uint32(info.id), + KeySize: uint32(info.key_size), + ValueSize: uint32(info.value_size), + MaxEntries: uint32(info.max_entries), + MapFlags: uint32(info.map_flags), + Name: C.GoString(&info.name[0]), + IfIndex: uint32(info.ifindex), + BTFVmlinuxValueTypeID: uint32(info.btf_vmlinux_value_type_id), + NetnsDev: uint64(info.netns_dev), + NetnsIno: uint64(info.netns_ino), + BTFID: uint32(info.btf_id), + BTFKeyTypeID: uint32(info.btf_key_type_id), + BTFValueTypeID: uint32(info.btf_value_type_id), + MapExtra: uint64(info.map_extra), + }, nil +} + +// +// Map misc internal +// + +// calcMapValueSize calculates the size of the value for a map. +// For per-CPU maps, it is calculated based on the number of possible CPUs. +func calcMapValueSize(valueSize int, mapType MapType) (int, error) { + if valueSize <= 0 { + return 0, fmt.Errorf("value size must be greater than 0") + } + + switch mapType { + case MapTypePerCPUArray, + MapTypePerCPUHash, + MapTypeLRUPerCPUHash, + MapTypePerCPUCgroupStorage: + // per-CPU maps have a value size calculated using a round-up of the + // element size multiplied by the number of possible CPUs. + elemSize := roundUp(uint64(valueSize), 8) + numCPU, err := NumPossibleCPUs() + if err != nil { + return 0, err + } + + return int(elemSize) * numCPU, nil + default: + // For other maps, the value size does not change. + return valueSize, nil + } +} diff --git a/map-iterator.go b/map-iterator.go new file mode 100644 index 00000000..bf694dad --- /dev/null +++ b/map-iterator.go @@ -0,0 +1,66 @@ +package libbpfgo + +/* +#cgo LDFLAGS: -lelf -lz +#include "libbpfgo.h" +*/ +import "C" + +import ( + "syscall" + "unsafe" +) + +// +// BPFMapIterator (low-level API) +// + +// BPFMapIterator iterates over keys in a BPF map. +type BPFMapIterator struct { + mapFD int + keySize int + err error + prev []byte + next []byte +} + +// Next advances the iterator to the next key in the map. +func (it *BPFMapIterator) Next() bool { + if it.err != nil { + return false + } + + prevPtr := unsafe.Pointer(nil) + if it.next != nil { + prevPtr = unsafe.Pointer(&it.next[0]) + } + + next := make([]byte, it.keySize) + nextPtr := unsafe.Pointer(&next[0]) + + retC := C.bpf_map_get_next_key(C.int(it.mapFD), prevPtr, nextPtr) + if retC < 0 { + if err := syscall.Errno(-retC); err != syscall.ENOENT { + it.err = err + } + + return false + } + + it.prev = it.next + it.next = next + + return true +} + +// Key returns the current key value of the iterator, if the most recent call +// to Next returned true. +// The slice is valid only until the next call to Next. +func (it *BPFMapIterator) Key() []byte { + return it.next +} + +// Err returns the last error that ocurred while table.Iter or iter.Next. +func (it *BPFMapIterator) Err() error { + return it.err +} diff --git a/map-low.go b/map-low.go new file mode 100644 index 00000000..73075328 --- /dev/null +++ b/map-low.go @@ -0,0 +1,358 @@ +package libbpfgo + +/* +#cgo LDFLAGS: -lelf -lz +#include "libbpfgo.h" +*/ +import "C" + +import ( + "fmt" + "syscall" + "unsafe" +) + +// +// BPFMapLow (low-level API) +// + +// BPFMapLow provides a low-level interface to BPF maps. +// Its methods follow the BPFMap naming convention. +type BPFMapLow struct { + fd int + info *BPFMapInfo +} + +// BPFMapCreateOpts mirrors the C structure bpf_map_create_opts. +type BPFMapCreateOpts struct { + BTFFD uint32 + BTFKeyTypeID uint32 + BTFValueTypeID uint32 + BTFVmlinuxValueTypeID uint32 + InnerMapFD uint32 + MapFlags uint32 + MapExtra uint64 + NumaNode uint32 + MapIfIndex uint32 +} + +// CreateMap creates a new BPF map with the given parameters. +func CreateMap(mapType MapType, mapName string, keySize, valueSize, maxEntries int, opts *BPFMapCreateOpts) (*BPFMapLow, error) { + mapNameC := C.CString(mapName) + defer C.free(unsafe.Pointer(mapNameC)) + var optsC *C.struct_bpf_map_create_opts + var errno error + + if opts != nil { + optsC, errno = C.cgo_bpf_map_create_opts_new( + C.uint(opts.BTFFD), + C.uint(opts.BTFKeyTypeID), + C.uint(opts.BTFValueTypeID), + C.uint(opts.BTFVmlinuxValueTypeID), + C.uint(opts.InnerMapFD), + C.uint(opts.MapFlags), + C.ulonglong(opts.MapExtra), + C.uint(opts.NumaNode), + C.uint(opts.MapIfIndex), + ) + if optsC == nil { + return nil, fmt.Errorf("failed to create bpf_map_create_opts: %w", errno) + } + defer C.cgo_bpf_map_create_opts_free(optsC) + } + + fdC := C.bpf_map_create(uint32(mapType), mapNameC, C.uint(keySize), C.uint(valueSize), C.uint(maxEntries), optsC) + if fdC < 0 { + return nil, fmt.Errorf("could not create map %s: %w", mapName, syscall.Errno(-fdC)) + } + + info, errInfo := GetMapInfoByFD(int(fdC)) + if errInfo != nil { + if errClose := syscall.Close(int(fdC)); errClose != nil { + return nil, fmt.Errorf("could not create map %s: %w, %w", mapName, errInfo, errClose) + } + + return nil, fmt.Errorf("could not create map %s: %w", mapName, errInfo) + } + + return &BPFMapLow{ + fd: int(fdC), + info: info, + }, nil +} + +// +// BPFMapLow Specs +// + +func (m *BPFMapLow) FileDescriptor() int { + return m.fd +} + +func (m *BPFMapLow) Name() string { + return m.info.Name +} + +func (m *BPFMapLow) Type() MapType { + return MapType(m.info.Type) +} + +func (m *BPFMapLow) MaxEntries() uint32 { + return m.info.MaxEntries +} + +// TODO: implement `bpf_map__map_flags` +// func (m *BPFMapLow) MapFlags() MapFlag { +// } + +// TODO: implement `bpf_map__numa_node` +// func (m *BPFMapLow) NUMANode() uint32 { +// } + +func (m *BPFMapLow) KeySize() int { + return int(m.info.KeySize) +} + +func (m *BPFMapLow) ValueSize() int { + return int(m.info.ValueSize) +} + +// TODO: implement `bpf_map__btf_key_type_id` +// func (m *BPFMapLow) BTFKeyTypeID() uint32 { +// } + +// TODO: implement `bpf_map__btf_value_type_id` +// func (m *BPFMapLow) BTFValueTypeID() uint32 { +// } + +// TODO: implement `bpf_map__ifindex` +// func (m *BPFMapLow) IfIndex() uint32 { +// } + +// TODO: implement `bpf_map__map_extra` +// func (m *BPFMapLow) MapExtra() uint64 { +// } + +// +// BPFMapLow Operations +// + +func (m *BPFMapLow) GetValue(key unsafe.Pointer) ([]byte, error) { + return m.GetValueFlags(key, MapFlagUpdateAny) +} + +func (m *BPFMapLow) GetValueFlags(key unsafe.Pointer, flags MapFlag) ([]byte, error) { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return nil, fmt.Errorf("map %s %w", m.Name(), err) + } + + value := make([]byte, valueSize) + retC := C.bpf_map_lookup_elem_flags( + C.int(m.FileDescriptor()), + key, + unsafe.Pointer(&value[0]), + C.ulonglong(flags), + ) + if retC < 0 { + return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) + } + + return value, nil +} + +// TODO: implement `bpf_map__lookup_and_delete_elem` +// func (m *BPFMapLow) GetValueAndDeleteKey(key unsafe.Pointer) ([]byte, error) { +// } + +func (m *BPFMapLow) Update(key, value unsafe.Pointer) error { + return m.UpdateValueFlags(key, value, MapFlagUpdateAny) +} + +func (m *BPFMapLow) UpdateValueFlags(key, value unsafe.Pointer, flags MapFlag) error { + retC := C.bpf_map_update_elem( + C.int(m.FileDescriptor()), + key, + value, + C.ulonglong(flags), + ) + if retC < 0 { + return fmt.Errorf("failed to update map %s: %w", m.Name(), syscall.Errno(-retC)) + } + + return nil +} + +func (m *BPFMapLow) DeleteKey(key unsafe.Pointer) error { + retC := C.bpf_map_delete_elem(C.int(m.FileDescriptor()), key) + if retC < 0 { + return fmt.Errorf("failed to delete key %d in map %s: %w", key, m.Name(), syscall.Errno(-retC)) + } + + return nil +} + +// TODO: implement `bpf_map__get_next_key` +// func (m *BPFMapLow) GetNextKey(key unsafe.Pointer) (unsafe.Pointer, error) { +// } + +// +// BPFMapLow Batch Operations +// + +func (m *BPFMapLow) GetValueBatch(keys unsafe.Pointer, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return nil, fmt.Errorf("map %s %w", m.Name(), err) + } + + var ( + values = make([]byte, valueSize*int(count)) + valuesPtr = unsafe.Pointer(&values[0]) + countC = C.uint(count) + ) + + optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) + if optsC == nil { + return nil, fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) + } + defer C.cgo_bpf_map_batch_opts_free(optsC) + + // The batch APIs are a bit different in which they can return an error, but + // depending on the errno code, it might mean a complete error (nothing was + // done) or a partial success (some elements were processed). + // + // - On complete sucess, it will return 0, and errno won't be set. + // - On partial sucess, it will return -1, and errno will be set to ENOENT. + // - On error, it will return -1, and an errno different to ENOENT. + retC := C.bpf_map_lookup_batch( + C.int(m.FileDescriptor()), + startKey, + nextKey, + keys, + valuesPtr, + &countC, + optsC, + ) + errno = syscall.Errno(-retC) + if retC < 0 && errno != syscall.ENOENT { + return nil, fmt.Errorf("failed to batch get value %v in map %s: %w", keys, m.Name(), errno) + } + + // Either some or all entries were read. + // retC < 0 && errno == syscall.ENOENT indicates a partial read. + return collectBatchValues(values, uint32(countC), valueSize), nil +} + +func (m *BPFMapLow) GetValueAndDeleteBatch(keys, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return nil, fmt.Errorf("map %s %w", m.Name(), err) + } + + var ( + values = make([]byte, valueSize*int(count)) + valuesPtr = unsafe.Pointer(&values[0]) + countC = C.uint(count) + ) + + optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) + if optsC == nil { + return nil, fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) + } + defer C.cgo_bpf_map_batch_opts_free(optsC) + + retC := C.bpf_map_lookup_and_delete_batch( + C.int(m.FileDescriptor()), + startKey, + nextKey, + keys, + valuesPtr, + &countC, + optsC, + ) + errno = syscall.Errno(-retC) + if retC < 0 && errno != syscall.ENOENT { + return nil, fmt.Errorf("failed to batch lookup and delete values %v in map %s: %w", keys, m.Name(), errno) + } + + // Either some or all entries were read and deleted. + // retC < 0 && errno == syscall.ENOENT indicates a partial read and delete. + return collectBatchValues(values, uint32(countC), valueSize), nil +} + +func (m *BPFMapLow) UpdateBatch(keys, values unsafe.Pointer, count uint32) error { + countC := C.uint(count) + + optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) + if optsC == nil { + return fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) + } + defer C.cgo_bpf_map_batch_opts_free(optsC) + + retC := C.bpf_map_update_batch( + C.int(m.FileDescriptor()), + keys, + values, + &countC, + optsC, + ) + errno = syscall.Errno(-retC) + if retC < 0 { + if errno != syscall.EFAULT && uint32(countC) != count { + return fmt.Errorf("failed to update ALL elements in map %s, updated (%d/%d): %w", m.Name(), uint32(countC), count, errno) + } + return fmt.Errorf("failed to batch update elements in map %s: %w", m.Name(), errno) + } + + return nil +} + +func (m *BPFMapLow) DeleteKeyBatch(keys unsafe.Pointer, count uint32) error { + countC := C.uint(count) + + optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) + if optsC == nil { + return fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) + } + defer C.cgo_bpf_map_batch_opts_free(optsC) + + retC := C.bpf_map_delete_batch( + C.int(m.FileDescriptor()), + keys, + &countC, + optsC, + ) + errno = syscall.Errno(-retC) + if retC < 0 && errno != syscall.ENOENT { + return fmt.Errorf("failed to batch delete keys %v in map %s: %w", keys, m.Name(), errno) + } + + // retC < 0 && errno == syscall.ENOENT indicates a partial deletion. + return nil +} + +func collectBatchValues(values []byte, count uint32, valueSize int) [][]byte { + var value []byte + var collected [][]byte + + for i := 0; i < int(count*uint32(valueSize)); i += valueSize { + value = values[i : i+valueSize] + collected = append(collected, value) + } + + return collected +} + +// +// BPFMapLow Iterator +// + +func (m *BPFMapLow) Iterator() *BPFMapIterator { + return &BPFMapIterator{ + mapFD: m.FileDescriptor(), + keySize: m.KeySize(), + prev: nil, + next: nil, + } +} diff --git a/map.go b/map.go index 0249b63f..dccc7a67 100644 --- a/map.go +++ b/map.go @@ -12,542 +12,326 @@ import ( "unsafe" ) -// BPFMapCreateOpts mirrors the C structure bpf_map_create_opts -type BPFMapCreateOpts struct { - Size uint64 - BtfFD uint32 - BtfKeyTypeID uint32 - BtfValueTypeID uint32 - BtfVmlinuxValueTypeID uint32 - InnerMapFD uint32 - MapFlags uint32 - MapExtra uint64 - NumaNode uint32 - MapIfIndex uint32 -} - -func bpfMapCreateOptsToC(createOpts *BPFMapCreateOpts) *C.struct_bpf_map_create_opts { - if createOpts == nil { - return nil - } - opts := C.struct_bpf_map_create_opts{} - opts.sz = C.ulong(createOpts.Size) - opts.btf_fd = C.uint(createOpts.BtfFD) - opts.btf_key_type_id = C.uint(createOpts.BtfKeyTypeID) - opts.btf_value_type_id = C.uint(createOpts.BtfValueTypeID) - opts.btf_vmlinux_value_type_id = C.uint(createOpts.BtfVmlinuxValueTypeID) - opts.inner_map_fd = C.uint(createOpts.InnerMapFD) - opts.map_flags = C.uint(createOpts.MapFlags) - opts.map_extra = C.ulonglong(createOpts.MapExtra) - opts.numa_node = C.uint(createOpts.NumaNode) - opts.map_ifindex = C.uint(createOpts.MapIfIndex) - - return &opts -} - -// CreateMap creates a BPF map from userspace. This can be used for populating -// BPF array of maps or hash of maps. However, this function uses a low-level -// libbpf API; maps created in this way do not conform to libbpf map formats, -// and therefore do not have access to libbpf high level bpf_map__* APIS -// which causes different behavior from maps created in the kernel side code -// -// See usage of `bpf_map_create()` in kernel selftests for more info -func CreateMap(mapType MapType, mapName string, keySize, valueSize, maxEntries int, opts *BPFMapCreateOpts) (*BPFMap, error) { - cs := C.CString(mapName) - fdOrError := C.bpf_map_create(uint32(mapType), cs, C.uint(keySize), C.uint(valueSize), C.uint(maxEntries), bpfMapCreateOptsToC(opts)) - C.free(unsafe.Pointer(cs)) - if fdOrError < 0 { - return nil, fmt.Errorf("could not create map: %w", syscall.Errno(-fdOrError)) - } - - return &BPFMap{ - name: mapName, - fd: fdOrError, - module: nil, - bpfMap: nil, - }, nil -} +// +// BPFMap (high-level API - `bpf_map__*`) +// +// BPFMap is a wrapper around a libbpf bpf_map. type BPFMap struct { - name string - bpfMap *C.struct_bpf_map - fd C.int - module *Module -} - -type MapType uint32 - -const ( - MapTypeUnspec MapType = iota - MapTypeHash - MapTypeArray - MapTypeProgArray - MapTypePerfEventArray - MapTypePerCPUHash - MapTypePerCPUArray - MapTypeStackTrace - MapTypeCgroupArray - MapTypeLRUHash - MapTypeLRUPerCPUHash - MapTypeLPMTrie - MapTypeArrayOfMaps - MapTypeHashOfMaps - MapTypeDevMap - MapTypeSockMap - MapTypeCPUMap - MapTypeXSKMap - MapTypeSockHash - MapTypeCgroupStorage - MapTypeReusePortSockArray - MapTypePerCPUCgroupStorage - MapTypeQueue - MapTypeStack - MapTypeSKStorage - MapTypeDevmapHash - MapTypeStructOps - MapTypeRingbuf - MapTypeInodeStorage - MapTypeTaskStorage - MapTypeBloomFilter -) - -type MapFlag uint32 + bpfMap *C.struct_bpf_map + bpfMapLow *BPFMapLow + module *Module +} -const ( - MapFlagUpdateAny MapFlag = iota // create new element or update existing - MapFlagUpdateNoExist // create new element if it didn't exist - MapFlagUpdateExist // update existing element - MapFlagFLock // spin_lock-ed map_lookup/map_update -) +// +// BPFMap Specs +// -func (m MapType) String() string { - x := map[MapType]string{ - MapTypeUnspec: "BPF_MAP_TYPE_UNSPEC", - MapTypeHash: "BPF_MAP_TYPE_HASH", - MapTypeArray: "BPF_MAP_TYPE_ARRAY", - MapTypeProgArray: "BPF_MAP_TYPE_PROG_ARRAY", - MapTypePerfEventArray: "BPF_MAP_TYPE_PERF_EVENT_ARRAY", - MapTypePerCPUHash: "BPF_MAP_TYPE_PERCPU_HASH", - MapTypePerCPUArray: "BPF_MAP_TYPE_PERCPU_ARRAY", - MapTypeStackTrace: "BPF_MAP_TYPE_STACK_TRACE", - MapTypeCgroupArray: "BPF_MAP_TYPE_CGROUP_ARRAY", - MapTypeLRUHash: "BPF_MAP_TYPE_LRU_HASH", - MapTypeLRUPerCPUHash: "BPF_MAP_TYPE_LRU_PERCPU_HASH", - MapTypeLPMTrie: "BPF_MAP_TYPE_LPM_TRIE", - MapTypeArrayOfMaps: "BPF_MAP_TYPE_ARRAY_OF_MAPS", - MapTypeHashOfMaps: "BPF_MAP_TYPE_HASH_OF_MAPS", - MapTypeDevMap: "BPF_MAP_TYPE_DEVMAP", - MapTypeSockMap: "BPF_MAP_TYPE_SOCKMAP", - MapTypeCPUMap: "BPF_MAP_TYPE_CPUMAP", - MapTypeXSKMap: "BPF_MAP_TYPE_XSKMAP", - MapTypeSockHash: "BPF_MAP_TYPE_SOCKHASH", - MapTypeCgroupStorage: "BPF_MAP_TYPE_CGROUP_STORAGE", - MapTypeReusePortSockArray: "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY", - MapTypePerCPUCgroupStorage: "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE", - MapTypeQueue: "BPF_MAP_TYPE_QUEUE", - MapTypeStack: "BPF_MAP_TYPE_STACK", - MapTypeSKStorage: "BPF_MAP_TYPE_SK_STORAGE", - MapTypeDevmapHash: "BPF_MAP_TYPE_DEVMAP_HASH", - MapTypeStructOps: "BPF_MAP_TYPE_STRUCT_OPS", - MapTypeRingbuf: "BPF_MAP_TYPE_RINGBUF", - MapTypeInodeStorage: "BPF_MAP_TYPE_INODE_STORAGE", - MapTypeTaskStorage: "BPF_MAP_TYPE_TASK_STORAGE", - MapTypeBloomFilter: "BPF_MAP_TYPE_BLOOM_FILTER", - } - return x[m] +func (m *BPFMap) Module() *Module { + return m.module } -func (b *BPFMap) Name() string { - cs := C.bpf_map__name(b.bpfMap) - if cs == nil { - return "" - } - s := C.GoString(cs) - return s +// Deprecated: use BPFMap.Module() instead. +func (m *BPFMap) GetModule() *Module { + return m.Module() } -func (b *BPFMap) Type() MapType { - return MapType(C.bpf_map__type(b.bpfMap)) +func (m *BPFMap) FileDescriptor() int { + return int(C.bpf_map__fd(m.bpfMap)) } -// SetType is used to set the type of a bpf map that isn't associated -// with a file descriptor already. If the map is already associated -// with a file descriptor the libbpf API will return error code EBUSY -func (b *BPFMap) SetType(mapType MapType) error { - errC := C.bpf_map__set_type(b.bpfMap, C.enum_bpf_map_type(int(mapType))) - if errC != 0 { - return fmt.Errorf("could not set bpf map type: %w", syscall.Errno(-errC)) - } - return nil +// Deprecated: use BPFMap.FileDescriptor() instead. +func (m *BPFMap) GetFd() int { + return m.FileDescriptor() } -func (b *BPFMap) Pin(pinPath string) error { - path := C.CString(pinPath) - ret := C.bpf_map__pin(b.bpfMap, path) - C.free(unsafe.Pointer(path)) - if ret != 0 { - return fmt.Errorf("failed to pin map %s to path %s: %w", b.name, pinPath, syscall.Errno(-ret)) - } - return nil +// bpf_map__reuse_fd +// func (m *BPFMap) ReuseFD(fd int) error { +// } + +func (m *BPFMap) Name() string { + return C.GoString(C.bpf_map__name(m.bpfMap)) } -func (b *BPFMap) Unpin(pinPath string) error { - path := C.CString(pinPath) - ret := C.bpf_map__unpin(b.bpfMap, path) - C.free(unsafe.Pointer(path)) - if ret != 0 { - return fmt.Errorf("failed to unpin map %s from path %s: %w", b.name, pinPath, syscall.Errno(-ret)) - } - return nil +// Deprecated: use BPFMap.Name() instead. +func (m *BPFMap) GetName() string { + return m.Name() } -func (b *BPFMap) SetPinPath(pinPath string) error { - path := C.CString(pinPath) - ret := C.bpf_map__set_pin_path(b.bpfMap, path) - C.free(unsafe.Pointer(path)) - if ret != 0 { - return fmt.Errorf("failed to set pin for map %s to path %s: %w", b.name, pinPath, syscall.Errno(-ret)) - } - return nil +func (m *BPFMap) Type() MapType { + return MapType(C.bpf_map__type(m.bpfMap)) } -// Resize changes the map's capacity to maxEntries. -// It should be called after the module was initialized but -// prior to it being loaded with BPFLoadObject. -// Note: for ring buffer and perf buffer, maxEntries is the -// capacity in bytes. -func (b *BPFMap) Resize(maxEntries uint32) error { - ret := C.bpf_map__set_max_entries(b.bpfMap, C.uint(maxEntries)) - if ret != 0 { - return fmt.Errorf("failed to resize map %s to %v: %w", b.name, maxEntries, syscall.Errno(-ret)) +// SetType assigns a specific type to a BPFMap instance that is not yet associated +// with a file descriptor. +func (m *BPFMap) SetType(mapType MapType) error { + retC := C.bpf_map__set_type(m.bpfMap, C.enum_bpf_map_type(int(mapType))) + if retC < 0 { + return fmt.Errorf("could not set bpf map type: %w", syscall.Errno(-retC)) } + return nil } -// GetMaxEntries returns the map's capacity. -// Note: for ring buffer and perf buffer, maxEntries is the -// capacity in bytes. -func (b *BPFMap) GetMaxEntries() uint32 { - maxEntries := C.bpf_map__max_entries(b.bpfMap) - return uint32(maxEntries) +// MaxEntries returns the capacity of the BPFMap. +// +// For ring and perf buffer types, this returns the capacity in bytes. +func (m *BPFMap) MaxEntries() uint32 { + return uint32(C.bpf_map__max_entries(m.bpfMap)) } -func (b *BPFMap) FileDescriptor() int { - return int(C.bpf_map__fd(b.bpfMap)) +// Deprecated: use BPFMap.MaxEntries() instead. +func (m *BPFMap) GetMaxEntries() uint32 { + return m.MaxEntries() } -// Deprecated: use BPFMap.FileDescriptor() instead. -func (b *BPFMap) GetFd() int { - return b.FileDescriptor() -} +// SetMaxEntries sets the capacity of the BPFMap to the given maxEntries value. +// +// This function must be called after BPF module initialization and before loading +// the module with BPFLoadObject, enabling customization of the map capacity. +// +// For ring and perf buffer types, maxEntries represents the capacity in bytes. +func (m *BPFMap) SetMaxEntries(maxEntries uint32) error { + retC := C.bpf_map__set_max_entries(m.bpfMap, C.uint(maxEntries)) + if retC < 0 { + return fmt.Errorf("failed to set map %s max entries to %v: %w", m.Name(), maxEntries, syscall.Errno(-retC)) + } -// Deprecated: use BPFMap.Name() instead. -func (b *BPFMap) GetName() string { - return b.Name() + return nil } -func (b *BPFMap) GetModule() *Module { - return b.module +// Deprecated: use BPFMap.SetMaxEntries() instead. +func (m *BPFMap) Resize(maxEntries uint32) error { + return m.SetMaxEntries(maxEntries) } -func (b *BPFMap) PinPath() string { - return C.GoString(C.bpf_map__pin_path(b.bpfMap)) +func (m *BPFMap) MapFlags() MapFlag { + return MapFlag(C.bpf_map__map_flags(m.bpfMap)) } -// Deprecated: use BPFMap.PinPath() instead. -func (b *BPFMap) GetPinPath() string { - return b.PinPath() -} +// TODO: implement `bpf_map__set_map_flags` wrapper +// func (m *BPFMap) SetMapFlags(flags MapFlag) error { +// } -func (b *BPFMap) IsPinned() bool { - isPinned := C.bpf_map__is_pinned(b.bpfMap) - return isPinned == C.bool(true) -} +// TODO: implement `bpf_map__numa_node` wrapper +// func (m *BPFMap) NUMANode() uint32 { +// } + +// TODO: implement `bpf_map__set_numa_node` wrapper +// func (m *BPFMap) SetNUMANode(node uint32) error { +// } -func (b *BPFMap) KeySize() int { - return int(C.bpf_map__key_size(b.bpfMap)) +func (m *BPFMap) KeySize() int { + return int(C.bpf_map__key_size(m.bpfMap)) } -func (b *BPFMap) ValueSize() int { - return int(C.bpf_map__value_size(b.bpfMap)) +// TODO: implement `bpf_map__set_key_size` wrapper +// func (m *BPFMap) SetKeySize(size uint32) error { +// } + +func (m *BPFMap) ValueSize() int { + return int(C.bpf_map__value_size(m.bpfMap)) } -func (b *BPFMap) SetValueSize(size uint32) error { - ret := C.bpf_map__set_value_size(b.bpfMap, C.uint(size)) - if ret != 0 { - return fmt.Errorf("could not set map value size: %w", syscall.Errno(-ret)) +// SetValueSize sets the value size to a BPFMap instance that is not yet associated +// with a file descriptor. +func (m *BPFMap) SetValueSize(size uint32) error { + retC := C.bpf_map__set_value_size(m.bpfMap, C.uint(size)) + if retC < 0 { + return fmt.Errorf("could not set map value size: %w", syscall.Errno(-retC)) } + return nil } -// GetValue takes a pointer to the key which is stored in the map. -// It returns the associated value as a slice of bytes. -// All basic types, and structs are supported as keys. -// -// NOTE: Slices and arrays are also supported but special care -// should be taken as to take a reference to the first element -// in the slice or array instead of the slice/array itself, as to -// avoid undefined behavior. -func (b *BPFMap) GetValue(key unsafe.Pointer) ([]byte, error) { - value := make([]byte, b.ValueSize()) - valuePtr := unsafe.Pointer(&value[0]) - - ret, errC := C.bpf_map_lookup_elem(b.fd, key, valuePtr) - if ret != 0 { - return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, b.name, errC) - } - return value, nil -} +// TODO: implement `bpf_map__btf_key_type_id` wrapper +// func (m *BPFMap) BTFKeyTypeID() uint32 { +// } -func (b *BPFMap) GetValueFlags(key unsafe.Pointer, flags MapFlag) ([]byte, error) { - value := make([]byte, b.ValueSize()) - valuePtr := unsafe.Pointer(&value[0]) +// TODO: implement `bpf_map__btf_value_type_id` wrapper +// func (m *BPFMap) BTFValueTypeID() uint32 { +// } - errC := C.bpf_map_lookup_elem_flags(b.fd, key, valuePtr, C.ulonglong(flags)) - if errC != 0 { - return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, b.name, syscall.Errno(-errC)) - } - return value, nil +func (m *BPFMap) IfIndex() uint32 { + return uint32(C.bpf_map__ifindex(m.bpfMap)) } -// GetValueReadInto is like GetValue, except it allows the caller to pass in -// a pointer to the slice of bytes that the value would be read into from the -// map. -// This is useful for reading from maps with variable sizes, especially -// per-cpu arrays and hash maps where the size of each value depends on the -// number of CPUs -func (b *BPFMap) GetValueReadInto(key unsafe.Pointer, value *[]byte) error { - valuePtr := unsafe.Pointer(&(*value)[0]) - ret := C.bpf_map__lookup_elem(b.bpfMap, key, C.ulong(b.KeySize()), valuePtr, C.ulong(len(*value)), 0) - if ret != 0 { - return fmt.Errorf("failed to lookup value %v in map %s: %w", key, b.name, syscall.Errno(-ret)) - } - return nil +// TODO: implement `bpf_map__set_ifindex` wrapper +// func (m *BPFMap) SetIfIndex(ifIndex uint32) error { +// } + +func (m *BPFMap) MapExtra() uint64 { + return uint64(C.bpf_map__map_extra(m.bpfMap)) } -func (b *BPFMap) setInitialValue(value unsafe.Pointer) error { - sz := b.ValueSize() - ret := C.bpf_map__set_initial_value(b.bpfMap, value, C.ulong(sz)) - if ret != 0 { - return fmt.Errorf("failed to set inital value for map %s: %w", b.name, syscall.Errno(-ret)) +// TODO: implement `bpf_map__set_map_extra` wrapper +// func (m *BPFMap) SetMapExtra(extra uint64) error { +// } + +func (m *BPFMap) InitialValue() ([]byte, error) { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return nil, fmt.Errorf("map %s %w", m.Name(), err) } - return nil -} -func (b *BPFMap) getInitialValue() []byte { - value := make([]byte, b.ValueSize()) - valuePtr := unsafe.Pointer(&value[0]) - C.get_internal_map_init_value(b.bpfMap, valuePtr) - return value -} + value := make([]byte, valueSize) + C.cgo_bpf_map__initial_value(m.bpfMap, unsafe.Pointer(&value[0])) -// BPFMapBatchOpts mirrors the C structure bpf_map_batch_opts. -type BPFMapBatchOpts struct { - Sz uint64 - ElemFlags uint64 - Flags uint64 + return value, nil } -func bpfMapBatchOptsToC(batchOpts *BPFMapBatchOpts) *C.struct_bpf_map_batch_opts { - if batchOpts == nil { - return nil +func (m *BPFMap) SetInitialValue(value unsafe.Pointer) error { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return fmt.Errorf("map %s %w", m.Name(), err) + } + + retC := C.bpf_map__set_initial_value(m.bpfMap, value, C.ulong(valueSize)) + if retC < 0 { + return fmt.Errorf("failed to set inital value for map %s: %w", m.Name(), syscall.Errno(-retC)) } - opts := C.struct_bpf_map_batch_opts{} - opts.sz = C.ulong(batchOpts.Sz) - opts.elem_flags = C.ulonglong(batchOpts.ElemFlags) - opts.flags = C.ulonglong(batchOpts.Flags) - return &opts + return nil } -// GetValueBatch allows for batch lookups of multiple keys from the map. +// TODO: implement `bpf_map__is_internal` wrapper +// func (m *BPFMap) IsInternal() bool { +// } + // -// The first argument, keys, is a pointer to an array or slice of keys which will be populated with the keys returned from this operation. -// It returns the associated values as a slice of slices of bytes. +// BPFMap Pinning // -// This API allows for batch lookups of multiple keys, potentially in steps over multiple iterations. For example, -// you provide the last key seen (or nil) for the startKey, and the first key to start the next iteration with in nextKey. -// Once the first iteration is complete you can provide the last key seen in the previous iteration as the startKey for the next iteration -// and repeat until nextKey is nil. -// -// The last argument, count, is the number of keys to lookup. The kernel will update it with the count of the elements that were -// retrieved. -// -// The API can return partial results even though an -1 is returned. In this case, errno will be set to `ENOENT` and the values slice and count -// will be filled in with the elements that were read. See the inline comment in `GetValueAndDeleteBatch` for more context. -func (b *BPFMap) GetValueBatch(keys unsafe.Pointer, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - var ( - values = make([]byte, b.ValueSize()*int(count)) - valuesPtr = unsafe.Pointer(&values[0]) - countC = C.uint(count) - ) - opts := &BPFMapBatchOpts{ - Sz: uint64(unsafe.Sizeof(BPFMapBatchOpts{})), - ElemFlags: C.BPF_ANY, - Flags: C.BPF_ANY, - } +func (m *BPFMap) PinPath() string { + return C.GoString(C.bpf_map__pin_path(m.bpfMap)) +} - ret, errC := C.bpf_map_lookup_batch(b.fd, startKey, nextKey, keys, valuesPtr, &countC, bpfMapBatchOptsToC(opts)) - processed := uint32(countC) +// Deprecated: use BPFMap.PinPath() instead. +func (m *BPFMap) GetPinPath() string { + return m.PinPath() +} - if ret != 0 && errC != syscall.ENOENT { - return nil, fmt.Errorf("failed to batch get value %v in map %s: ret %d (err: %s)", keys, b.name, ret, errC) +func (m *BPFMap) SetPinPath(pinPath string) error { + pathC := C.CString(pinPath) + defer C.free(unsafe.Pointer(pathC)) + + retC := C.bpf_map__set_pin_path(m.bpfMap, pathC) + if retC < 0 { + return fmt.Errorf("failed to set pin for map %s to path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) } - // Either some or all entries were read. - // ret = -1 && errno == syscall.ENOENT indicates a partial read. - return collectBatchValues(values, processed, b.ValueSize()), nil + return nil } -// GetValueAndDeleteBatch allows for batch lookup and deletion of elements where each element is deleted after being retrieved from the map. -// -// The first argument, keys, is a pointer to an array or slice of keys which will be populated with the keys returned from this operation. -// It returns the associated values as a slice of slices of bytes. -// -// This API allows for batch lookups and deletion of multiple keys, potentially in steps over multiple iterations. For example, -// you provide the last key seen (or nil) for the startKey, and the first key to start the next iteration with in nextKey. -// Once the first iteration is complete you can provide the last key seen in the previous iteration as the startKey for the next iteration -// and repeat until nextKey is nil. -// -// The last argument, count, is the number of keys to lookup and delete. The kernel will update it with the count of the elements that were -// retrieved and deleted. -// -// The API can return partial results even though an -1 is returned. In this case, errno will be set to `ENOENT` and the values slice and count -// will be filled in with the elements that were read. See the comment below for more context. -func (b *BPFMap) GetValueAndDeleteBatch(keys, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - var ( - values = make([]byte, b.ValueSize()*int(count)) - valuesPtr = unsafe.Pointer(&values[0]) - countC = C.uint(count) - ) +func (m *BPFMap) IsPinned() bool { + return bool(C.bpf_map__is_pinned(m.bpfMap)) +} - opts := &BPFMapBatchOpts{ - Sz: uint64(unsafe.Sizeof(BPFMapBatchOpts{})), - ElemFlags: C.BPF_ANY, - Flags: C.BPF_ANY, - } +func (m *BPFMap) Pin(pinPath string) error { + pathC := C.CString(pinPath) + defer C.free(unsafe.Pointer(pathC)) - // Before libbpf 1.0 (without LIBBPF_STRICT_DIRECT_ERRS), the return value - // and errno are not modified [1]. On error, we will get a return value of - // -1 and errno will be set accordingly with most BPF calls. - // - // The batch APIs are a bit different in which they can return an error, but - // depending on the errno code, it might mean a complete error (nothing was - // done) or a partial success (some elements were processed). - // - // - On complete sucess, it will return 0, and errno won't be set. - // - On partial sucess, it will return -1, and errno will be set to ENOENT. - // - On error, it will return -1, and an errno different to ENOENT. - // - // [1] https://github.com/libbpf/libbpf/blob/b69f8ee93ef6aa3518f8fbfd9d1df6c2c84fd08f/src/libbpf_internal.h#L496 - ret, errC := C.bpf_map_lookup_and_delete_batch( - b.fd, - startKey, - nextKey, - keys, - valuesPtr, - &countC, - bpfMapBatchOptsToC(opts)) - - processed := uint32(countC) - - if ret != 0 && errC != syscall.ENOENT { - // ret = -1 && errno == syscall.ENOENT indicates a partial read and delete. - return nil, fmt.Errorf("failed to batch lookup and delete values %v in map %s: ret %d (err: %s)", keys, b.name, ret, errC) + retC := C.bpf_map__pin(m.bpfMap, pathC) + if retC < 0 { + return fmt.Errorf("failed to pin map %s to path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) } - // Either some or all entries were read and deleted. - parsedVals := collectBatchValues(values, processed, b.ValueSize()) - return parsedVals, nil + return nil } -func collectBatchValues(values []byte, count uint32, valueSize int) [][]byte { - var value []byte - var collected [][]byte - for i := 0; i < int(count*uint32(valueSize)); i += valueSize { - value = values[i : i+valueSize] - collected = append(collected, value) +func (m *BPFMap) Unpin(pinPath string) error { + pathC := C.CString(pinPath) + defer C.free(unsafe.Pointer(pathC)) + + retC := C.bpf_map__unpin(m.bpfMap, pathC) + if retC < 0 { + return fmt.Errorf("failed to unpin map %s from path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) } - return collected + + return nil } -// UpdateBatch updates multiple elements in the map by specified keys and their corresponding values. // -// The first argument, keys, is a pointer to an array or slice of keys which will be updated using the second argument, values. -// It returns the associated error if any occurred. +// BPFMap Map of Maps // -// The last argument, count, is the number of keys to update. Passing an argument that greater than the number of keys -// in the map will cause the function to return a syscall.EPERM as an error. -func (b *BPFMap) UpdateBatch(keys, values unsafe.Pointer, count uint32) error { - countC := C.uint(count) - opts := BPFMapBatchOpts{ - Sz: uint64(unsafe.Sizeof(BPFMapBatchOpts{})), - ElemFlags: C.BPF_ANY, - Flags: C.BPF_ANY, - } +// TODO: implement `bpf_map__inner_map` wrapper +// func (m *BPFMap) InnerMap() *BPFMap { +// } - errC := C.bpf_map_update_batch(b.fd, keys, values, &countC, bpfMapBatchOptsToC(&opts)) - if errC != 0 { - sc := syscall.Errno(-errC) - if sc != syscall.EFAULT { - if uint32(countC) != count { - return fmt.Errorf("failed to update ALL elements in map %s, updated (%d/%d): %w", b.name, uint32(countC), count, sc) - } - } - return fmt.Errorf("failed to batch update elements in map %s: %w", b.name, syscall.Errno(-errC)) - } +// TODO: implement `bpf_map__set_inner_map_fd` wrapper +// func (m *BPFMap) SetInnerMapFD(innerMapFD int) error { +// } - return nil -} +// +// BPFMap Operations +// -// DeleteKeyBatch allows for batch deletion of multiple elements in the map. +// GetValue retrieves the value associated with a given key in the BPFMap. +// +// This function accepts an unsafe.Pointer to the key value to be searched +// in the map, and it returns the corresponding value as a slice of bytes. +// All basic types, and structs are supported as keys. +// +// NOTE: Slices and arrays are supported, but references should point to the first +// element in the slice or array, instead of the slice or array itself. This is +// crucial to prevent undefined behavior. +// +// For example: // -// `count` number of keys will be deleted from the map. Passing an argument that greater than the number of keys -// in the map will cause the function to delete fewer keys than requested. See the inline comment in -// `GetValueAndDeleteBatch` for more context. -func (b *BPFMap) DeleteKeyBatch(keys unsafe.Pointer, count uint32) error { - countC := C.uint(count) +// key := []byte{'a', 'b', 'c'} +// keyPtr := unsafe.Pointer(&key[0]) +// bpfmap.GetValue(keyPtr) +func (m *BPFMap) GetValue(key unsafe.Pointer) ([]byte, error) { + return m.GetValueFlags(key, MapFlagUpdateAny) +} - opts := &BPFMapBatchOpts{ - Sz: uint64(unsafe.Sizeof(BPFMapBatchOpts{})), - ElemFlags: C.BPF_ANY, - Flags: C.BPF_ANY, +func (m *BPFMap) GetValueFlags(key unsafe.Pointer, flags MapFlag) ([]byte, error) { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return nil, fmt.Errorf("map %s %w", m.Name(), err) } - ret, errC := C.bpf_map_delete_batch(b.fd, keys, &countC, bpfMapBatchOptsToC(opts)) - - if ret != 0 && errC != syscall.ENOENT { - return fmt.Errorf("failed to batch delete keys %v in map %s: ret %d (err: %s)", keys, b.name, ret, errC) + value := make([]byte, valueSize) + retC := C.bpf_map__lookup_elem( + m.bpfMap, + key, + C.ulong(m.KeySize()), + unsafe.Pointer(&value[0]), + C.ulong(valueSize), + C.ulonglong(flags), + ) + if retC < 0 { + return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) } - // ret = -1 && errno == syscall.ENOENT indicates a partial deletion. - return nil + return value, nil } -// DeleteKey takes a pointer to the key which is stored in the map. -// It removes the key and associated value from the BPFMap. -// All basic types, and structs are supported as keys. -// -// NOTE: Slices and arrays are also supported but special care -// should be taken as to take a reference to the first element -// in the slice or array instead of the slice/array itself, as to -// avoid undefined behavior. -func (b *BPFMap) DeleteKey(key unsafe.Pointer) error { - ret, errC := C.bpf_map_delete_elem(b.fd, key) - if ret != 0 { - return fmt.Errorf("failed to get lookup key %d from map %s: %w", key, b.name, errC) +// TODO: implement `bpf_map__lookup_and_delete_elem` wrapper +// func (m *BPFMap) GetValueAndDeleteKey(key unsafe.Pointer) ([]byte, error) { +// } + +// Deprecated: use BPFMap.GetValue() or BPFMap.GetValueFlags() instead, since +// they already calculate the value size for per-cpu maps. +func (m *BPFMap) GetValueReadInto(key unsafe.Pointer, value *[]byte) error { + valuePtr := unsafe.Pointer(&(*value)[0]) + retC := C.bpf_map__lookup_elem(m.bpfMap, key, C.ulong(m.KeySize()), valuePtr, C.ulong(len(*value)), 0) + if retC < 0 { + return fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) } + return nil } -// Update takes a pointer to a key and a value to associate it with in -// the BPFMap. The unsafe.Pointer should be taken on a reference to the -// underlying datatype. All basic types, and structs are supported +// Update inserts or updates value in BPFMap that corresponds to a given key. +// +// This function accepts unsafe.Pointer references to both the key and value. +// All basic types, and structs are supported. // -// NOTE: Slices and arrays are supported but references should be passed -// to the first element in the slice or array. +// NOTE: Slices and arrays are supported, but references should point to the first +// element in the slice or array, instead of the slice or array itself. This is +// crucial to prevent undefined behavior. // // For example: // @@ -556,69 +340,140 @@ func (b *BPFMap) DeleteKey(key unsafe.Pointer) error { // keyPtr := unsafe.Pointer(&key) // valuePtr := unsafe.Pointer(&value[0]) // bpfmap.Update(keyPtr, valuePtr) -func (b *BPFMap) Update(key, value unsafe.Pointer) error { - return b.UpdateValueFlags(key, value, MapFlagUpdateAny) +func (m *BPFMap) Update(key, value unsafe.Pointer) error { + return m.UpdateValueFlags(key, value, MapFlagUpdateAny) } -func (b *BPFMap) UpdateValueFlags(key, value unsafe.Pointer, flags MapFlag) error { - errC := C.bpf_map_update_elem(b.fd, key, value, C.ulonglong(flags)) - if errC != 0 { - return fmt.Errorf("failed to update map %s: %w", b.name, syscall.Errno(-errC)) +func (m *BPFMap) UpdateValueFlags(key, value unsafe.Pointer, flags MapFlag) error { + valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) + if err != nil { + return fmt.Errorf("map %s %w", m.Name(), err) } - return nil -} -// BPFMapIterator iterates over keys in a BPF map -type BPFMapIterator struct { - b *BPFMap - err error - prev []byte - next []byte -} - -func (b *BPFMap) Iterator() *BPFMapIterator { - return &BPFMapIterator{ - b: b, - prev: nil, - next: nil, + retC := C.bpf_map__update_elem( + m.bpfMap, + key, + C.ulong(m.KeySize()), + value, + C.ulong(valueSize), + C.ulonglong(flags), + ) + if retC < 0 { + return fmt.Errorf("failed to update map %s: %w", m.Name(), syscall.Errno(-retC)) } + + return nil } -func (it *BPFMapIterator) Next() bool { - if it.err != nil { - return false +// DeleteKey removes a specified key and its associated value from the BPFMap. +// +// This function accepts an unsafe.Pointer that references the key to be +// removed from the map. +// All basic types, and structs are supported as keys. +// +// NOTE: Slices and arrays are supported, but references should point to the first +// element in the slice or array, instead of the slice or array itself. This is +// crucial to prevent undefined behavior. +func (m *BPFMap) DeleteKey(key unsafe.Pointer) error { + retC := C.bpf_map__delete_elem(m.bpfMap, key, C.ulong(m.KeySize()), 0) + if retC < 0 { + return fmt.Errorf("failed to delete key %d in map %s: %w", key, m.Name(), syscall.Errno(-retC)) } - prevPtr := unsafe.Pointer(nil) - if it.next != nil { - prevPtr = unsafe.Pointer(&it.next[0]) - } + return nil +} - next := make([]byte, it.b.KeySize()) - nextPtr := unsafe.Pointer(&next[0]) +// TODO: implement `bpf_map__get_next_key` wrapper +// func (m *BPFMap) GetNextKey(key unsafe.Pointer) (unsafe.Pointer, error) { +// } - errC, err := C.bpf_map_get_next_key(it.b.fd, prevPtr, nextPtr) - if errno, ok := err.(syscall.Errno); errC == -2 && ok && errno == C.ENOENT { - return false - } - if err != nil { - it.err = err - return false - } +// +// BPFMap Batch Operations (low-level API) +// - it.prev = it.next - it.next = next +// GetValueBatch allows for batch lookups of multiple keys from the map. +// +// The first argument, keys, is a pointer to an array or slice of keys which will +// be populated with the keys returned from this operation. +// It returns the associated values as a slice of slices of bytes. +// +// This API allows for batch lookups of multiple keys, potentially in steps over +// multiple iterations. For example, you provide the last key seen (or nil) for +// the startKey, and the first key to start the next iteration with in nextKey. +// Once the first iteration is complete you can provide the last key seen in the +// previous iteration as the startKey for the next iteration and repeat until +// nextKey is nil. +// +// The last argument, count, is the number of keys to lookup. The kernel will +// update it with the count of the elements that were retrieved. +// +// The API can return partial results even though an -1 is returned. In this case, +// errno will be set to `ENOENT` and the values slice and count will be filled in +// with the elements that were read. See the comment in `BPFMapLow.GetValueBatch` +// for more context. +func (m *BPFMap) GetValueBatch(keys unsafe.Pointer, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { + return m.bpfMapLow.GetValueBatch(keys, startKey, nextKey, count) +} - return true +// GetValueAndDeleteBatch allows for batch lookup and deletion of elements where +// each element is deleted after being retrieved from the map. +// +// The first argument, keys, is a pointer to an array or slice of keys which will +// be populated with the keys returned from this operation. +// It returns the associated values as a slice of slices of bytes. +// +// This API allows for batch lookups and deletion of multiple keys, potentially +// in steps over multiple iterations. For example, you provide the last key seen +// (or nil) for the startKey, and the first key to start the next iteration +// with in nextKey. +// Once the first iteration is complete you can provide the last key seen in the +// previous iteration as the startKey for the next iteration and repeat until +// nextKey is nil. +// +// The last argument, count, is the number of keys to lookup and delete. The kernel +// will update it with the count of the elements that were retrieved and deleted. +// +// The API can return partial results even though an -1 is returned. In this case, +// errno will be set to `ENOENT` and the values slice and count will be filled in +// with the elements that were read. See the comment in `BPFMapLow.GetValueBatch` +// for more context. +func (m *BPFMap) GetValueAndDeleteBatch(keys, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { + return m.bpfMapLow.GetValueAndDeleteBatch(keys, startKey, nextKey, count) +} + +// UpdateBatch updates multiple elements in the map by specified keys and their +// corresponding values. +// +// The first argument, keys, is a pointer to an array or slice of keys which will +// be updated using the second argument, values. +// It returns the associated error if any occurred. +// +// The last argument, count, is the number of keys to update. Passing an argument +// that greater than the number of keys in the map will cause the function to +// return a syscall.EPERM as an error. +func (m *BPFMap) UpdateBatch(keys, values unsafe.Pointer, count uint32) error { + return m.bpfMapLow.UpdateBatch(keys, values, count) } -// Key returns the current key value of the iterator, if the most recent call to Next returned true. -// The slice is valid only until the next call to Next. -func (it *BPFMapIterator) Key() []byte { - return it.next +// DeleteKeyBatch allows for batch deletion of multiple elements in the map. +// +// `count` number of keys will be deleted from the map. Passing an argument that +// greater than the number of keys in the map will cause the function to delete +// fewer keys than requested. See the comment in `BPFMapLow.GetValueBatch` +// for more context. +func (m *BPFMap) DeleteKeyBatch(keys unsafe.Pointer, count uint32) error { + return m.bpfMapLow.DeleteKeyBatch(keys, count) } -// Err returns the last error that ocurred while table.Iter or iter.Next -func (it *BPFMapIterator) Err() error { - return it.err +// +// BPFMap Iterator (low-level API) +// + +func (m *BPFMap) Iterator() *BPFMapIterator { + return &BPFMapIterator{ + mapFD: m.FileDescriptor(), + keySize: m.KeySize(), + prev: nil, + next: nil, + } } diff --git a/misc.go b/misc.go new file mode 100644 index 00000000..66fd0d7b --- /dev/null +++ b/misc.go @@ -0,0 +1,16 @@ +package libbpfgo + +/* +#cgo LDFLAGS: -lelf -lz +#include "libbpfgo.h" +*/ +import "C" + +// +// Misc generic helpers +// + +// roundUp rounds x up to the nearest multiple of y. +func roundUp(x, y uint64) uint64 { + return ((x + (y - 1)) / y) * y +} diff --git a/selftest/create-map/main.go b/selftest/create-map/main.go index 8062bc33..e0c15ed2 100644 --- a/selftest/create-map/main.go +++ b/selftest/create-map/main.go @@ -37,8 +37,6 @@ func main() { defer bpfModule.Close() bpfModule.BPFLoadObject() - opts := bpf.BPFMapCreateOpts{} - opts.Size = uint64(unsafe.Sizeof(opts)) m, err := bpf.CreateMap(bpf.MapTypeHash, "foobar", 4, 4, 420, nil) if err != nil {