Skip to content

Commit

Permalink
aya+ebpf: Implement read+write methods for PerfEventArray
Browse files Browse the repository at this point in the history
This allow to read _and_ write a PerfEventArray, from userspace _and_ kernel.
  • Loading branch information
TheElectronWill committed Dec 11, 2023
1 parent 41351ec commit e714ca8
Show file tree
Hide file tree
Showing 13 changed files with 515 additions and 60 deletions.
12 changes: 12 additions & 0 deletions aya/src/maps/perf/async_perf_event_array.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{
borrow::{Borrow, BorrowMut},
path::Path,
os::fd::AsFd
};

// See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features.
Expand Down Expand Up @@ -116,6 +117,17 @@ impl<T: BorrowMut<MapData>> AsyncPerfEventArray<T> {
pub fn pin<P: AsRef<Path>>(&self, path: P) -> Result<(), PinError> {
self.perf_map.pin(path)
}

/// Inserts a perf_event file descriptor at the given index.
///
/// ## Errors
///
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
/// if `bpf_map_update_elem` fails.
pub fn set<FD: AsFd>(&mut self, index: u32, value: &FD) -> Result<(), MapError> {
let Self { perf_map } = self;
perf_map.set(index, value)
}
}

impl<T: Borrow<MapData>> AsyncPerfEventArray<T> {
Expand Down
30 changes: 29 additions & 1 deletion aya/src/maps/perf/perf_event_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ use std::{
sync::Arc,
};

use aya_obj::generated::BPF_ANY;
use bytes::BytesMut;

use crate::{
maps::{
check_bounds,
perf::{Events, PerfBuffer, PerfBufferError},
MapData, MapError, PinError,
},
sys::bpf_map_update_elem,
sys::{bpf_map_update_elem, SyscallError},
util::page_size,
};

Expand Down Expand Up @@ -188,6 +190,9 @@ impl<T: Borrow<MapData>> PerfEventArray<T> {
impl<T: BorrowMut<MapData>> PerfEventArray<T> {
/// Opens the perf buffer at the given index.
///
/// A ring-buffer of `1 + page_count` pages is created with `mmap`, where `page_count`
/// must be a power of two.
///
/// The returned buffer will receive all the events eBPF programs send at the given index.
pub fn open(
&mut self,
Expand All @@ -207,4 +212,27 @@ impl<T: BorrowMut<MapData>> PerfEventArray<T> {
_map: self.map.clone(),
})
}

/// Inserts a perf_event file descriptor at the given index.
///
/// ## Errors
///
/// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
/// if `bpf_map_update_elem` fails.
pub fn set<FD: AsFd>(&mut self, index: u32, value: &FD) -> Result<(), MapError> {
let data: &MapData = self.map.deref().borrow();
check_bounds(data, index)?;
let fd = data.fd().as_fd();

// only BPF_ANY or BPF_EXIST are allowed, and for arrays they do the same thing (the elements always exist)
let flags = BPF_ANY as u64;
let value = value.as_fd().as_raw_fd();
bpf_map_update_elem(fd, Some(&index), &value, flags).map_err(|(_, io_error)| {
MapError::SyscallError(SyscallError {
call: "bpf_map_update_elem",
io_error,
})
})?;
Ok(())
}
}
126 changes: 90 additions & 36 deletions aya/src/programs/perf_event.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Perf event programs.
use std::os::fd::AsFd as _;
use std::os::fd::{AsFd as _, OwnedFd};

pub use crate::generated::{
perf_hw_cache_id, perf_hw_cache_op_id, perf_hw_cache_op_result_id, perf_hw_id, perf_sw_ids,
Expand All @@ -20,10 +20,10 @@ use crate::{
perf_attach::{PerfLinkIdInner, PerfLinkInner},
FdLink, LinkError, ProgramData, ProgramError,
},
sys::{bpf_link_get_info_by_fd, perf_event_open, SyscallError},
sys::{self, bpf_link_get_info_by_fd, SyscallError},
};

/// The type of perf event
/// The type of perf event.
#[repr(u32)]
#[derive(Debug, Clone)]
pub enum PerfTypeId {
Expand All @@ -41,7 +41,7 @@ pub enum PerfTypeId {
Breakpoint = PERF_TYPE_BREAKPOINT as u32,
}

/// Sample Policy
/// Sample Policy.
#[derive(Debug, Clone)]
pub enum SamplePolicy {
/// Period
Expand All @@ -50,30 +50,44 @@ pub enum SamplePolicy {
Frequency(u64),
}

/// The scope of a PerfEvent
/// A flag whose bits indicate the fields to include in the event samples.
#[derive(Debug, Clone)]
pub struct SampleType(u64);

/// "Wake up" overflow notification policy.
/// Overflows are generated only by sampling events.
#[derive(Debug, Clone)]
pub enum WakeupPolicy {
/// Wake up after n events.
Events(u32),
/// Wake up after n bytes.
Watermark(u32),
}

/// The scope of a PerfEvent.
#[derive(Debug, Clone)]
#[allow(clippy::enum_variant_names)]
pub enum PerfEventScope {
/// Calling process, any cpu
/// Calling process, any cpu.
CallingProcessAnyCpu,
/// calling process, one cpu
/// Calling process, one cpu.
CallingProcessOneCpu {
/// cpu id
cpu: u32,
},
/// one process, any cpu
/// One process, any cpu.
OneProcessAnyCpu {
/// process id
pid: u32,
},
/// one process, one cpu
/// One process, one cpu.
OneProcessOneCpu {
/// cpu id
cpu: u32,
/// process id
pid: u32,
},
/// all processes, one cpu
/// All processes, one cpu.
AllProcessesOneCpu {
/// cpu id
cpu: u32,
Expand Down Expand Up @@ -147,33 +161,11 @@ impl PerfEvent {
) -> Result<PerfEventLinkId, ProgramError> {
let prog_fd = self.fd()?;
let prog_fd = prog_fd.as_fd();
let (sample_period, sample_frequency) = match sample_policy {
SamplePolicy::Period(period) => (period, None),
SamplePolicy::Frequency(frequency) => (0, Some(frequency)),
};
let (pid, cpu) = match scope {
PerfEventScope::CallingProcessAnyCpu => (0, -1),
PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32),
PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1),
PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32),
PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32),
};
let fd = perf_event_open(
perf_type as u32,
config,
pid,
cpu,
sample_period,
sample_frequency,
false,
0,
)
.map_err(|(_code, io_error)| SyscallError {
call: "perf_event_open",
io_error,
})?;

let link = perf_attach(prog_fd, fd)?;
let sampling = Some((sample_policy, SampleType(PERF_TYPE_RAW as u64)));
let event_fd = perf_event_open(perf_type as u32, config, scope, sampling, None, 0)?;

let link = perf_attach(prog_fd, event_fd)?;
self.data.links.insert(PerfEventLink::new(link))
}

Expand Down Expand Up @@ -225,3 +217,65 @@ define_link_wrapper!(
PerfLinkInner,
PerfLinkIdInner
);

/// Performs a call to `perf_event_open` and returns the event's file descriptor.
///
/// # Arguments
///
/// * `perf_type` - the type of event, see [`crate::generated::perf_type_id`] for a list of types. Note that this list is non-exhaustive, because PMUs (Performance Monitoring Units) can be added to the system. Their ids can be read from the sysfs (see the kernel documentation on perf_event_open).
/// * `config` - the event that we want to open
/// * `scope` - which process and cpu to monitor (logical cpu, not physical socket)
/// * `sampling` - if not None, enables the sampling mode with the given parameters
/// * `wakeup` - if not None, sets up the wake-up for the overflow notifications
/// * `flags` - various flags combined with a binary OR (for ex. `FLAG_A | FLAG_B`), zero means no flag
pub fn perf_event_open(
perf_type: u32,
config: u64,
scope: PerfEventScope,
sampling: Option<(SamplePolicy, SampleType)>,
wakeup: Option<WakeupPolicy>,
flags: u32,
) -> Result<OwnedFd, ProgramError> {
let mut attr = sys::init_perf_event_attr();

// Fill in the attributes
attr.type_ = perf_type;
attr.config = config;
match sampling {
Some((SamplePolicy::Frequency(f), SampleType(t))) => {
attr.set_freq(1);
attr.__bindgen_anon_1.sample_freq = f;
attr.sample_type = t;
}
Some((SamplePolicy::Period(p), SampleType(t))) => {
attr.__bindgen_anon_1.sample_period = p;
attr.sample_type = t;
}
None => (),
};
match wakeup {
Some(WakeupPolicy::Events(n)) => {
attr.__bindgen_anon_2.wakeup_events = n;
}
Some(WakeupPolicy::Watermark(n)) => {
attr.set_watermark(1);
attr.__bindgen_anon_2.wakeup_watermark = n;
}
None => (),
};

let (pid, cpu) = match scope {
PerfEventScope::CallingProcessAnyCpu => (0, -1),
PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32),
PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1),
PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32),
PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32),
};

sys::perf_event_sys(attr, pid, cpu, flags).map_err(|(_, io_error)| {
ProgramError::SyscallError(SyscallError {
call: "perf_event_open",
io_error,
})
})
}
23 changes: 12 additions & 11 deletions aya/src/sys/perf_event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ use crate::generated::{
PERF_FLAG_FD_CLOEXEC,
};

pub(crate) fn init_perf_event_attr() -> perf_event_attr {
let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
attr.size = mem::size_of::<perf_event_attr>() as u32;
attr
}

#[allow(clippy::too_many_arguments)]
pub(crate) fn perf_event_open(
pub(crate) fn perf_event_open_sampled(
perf_type: u32,
config: u64,
pid: pid_t,
Expand All @@ -26,10 +32,8 @@ pub(crate) fn perf_event_open(
wakeup: bool,
flags: u32,
) -> SysResult<OwnedFd> {
let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };

let mut attr = init_perf_event_attr();
attr.config = config;
attr.size = mem::size_of::<perf_event_attr>() as u32;
attr.type_ = perf_type;
attr.sample_type = PERF_SAMPLE_RAW as u64;
// attr.inherits = if pid > 0 { 1 } else { 0 };
Expand All @@ -46,7 +50,7 @@ pub(crate) fn perf_event_open(
}

pub(crate) fn perf_event_open_bpf(cpu: c_int) -> SysResult<OwnedFd> {
perf_event_open(
perf_event_open_sampled(
PERF_TYPE_SOFTWARE as u32,
PERF_COUNT_SW_BPF_OUTPUT as u64,
-1,
Expand All @@ -67,15 +71,14 @@ pub(crate) fn perf_event_open_probe(
) -> SysResult<OwnedFd> {
use std::os::unix::ffi::OsStrExt as _;

let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
let mut attr = init_perf_event_attr();

if let Some(ret_bit) = ret_bit {
attr.config = 1 << ret_bit;
}

let c_name = CString::new(name.as_bytes()).unwrap();

attr.size = mem::size_of::<perf_event_attr>() as u32;
attr.type_ = ty;
attr.__bindgen_anon_3.config1 = c_name.as_ptr() as u64;
attr.__bindgen_anon_4.config2 = offset;
Expand All @@ -87,9 +90,7 @@ pub(crate) fn perf_event_open_probe(
}

pub(crate) fn perf_event_open_trace_point(id: u32, pid: Option<pid_t>) -> SysResult<OwnedFd> {
let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };

attr.size = mem::size_of::<perf_event_attr>() as u32;
let mut attr = init_perf_event_attr();
attr.type_ = PERF_TYPE_TRACEPOINT as u32;
attr.config = id as u64;

Expand All @@ -112,7 +113,7 @@ pub(crate) fn perf_event_ioctl(
return crate::sys::TEST_SYSCALL.with(|test_impl| unsafe { test_impl.borrow()(call) });
}

fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult<OwnedFd> {
pub(crate) fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult<OwnedFd> {
let fd = syscall(Syscall::PerfEventOpen {
attr,
pid,
Expand Down
Loading

0 comments on commit e714ca8

Please sign in to comment.