Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(metrics): sync libs metrics collector refactor #3129

Merged
merged 4 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/modules/driver.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ else()
# In case you want to test against another driver version (or branch, or commit) just pass the variable -
# ie., `cmake -DDRIVER_VERSION=dev ..`
if(NOT DRIVER_VERSION)
set(DRIVER_VERSION "5dc692e3045f5102e1f7752d40ec7e4f81cbc29e")
set(DRIVER_CHECKSUM "SHA256=bf5c8fc89da46d1d3ed828e4be715e459c7d923ffee86952c35c30a009f945b6")
set(DRIVER_VERSION "0.15.0-rc1")
set(DRIVER_CHECKSUM "SHA256=c7bc705fc2b0f8dff8b39a6c865044040e10be41567e2c951908b5051786a06b")
endif()

# cd /path/to/build && cmake /path/to/source
Expand Down
4 changes: 2 additions & 2 deletions cmake/modules/falcosecurity-libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ else()
# In case you want to test against another falcosecurity/libs version (or branch, or commit) just pass the variable -
# ie., `cmake -DFALCOSECURITY_LIBS_VERSION=dev ..`
if(NOT FALCOSECURITY_LIBS_VERSION)
set(FALCOSECURITY_LIBS_VERSION "5dc692e3045f5102e1f7752d40ec7e4f81cbc29e")
set(FALCOSECURITY_LIBS_CHECKSUM "SHA256=bf5c8fc89da46d1d3ed828e4be715e459c7d923ffee86952c35c30a009f945b6")
set(FALCOSECURITY_LIBS_VERSION "0.15.0-rc1")
set(FALCOSECURITY_LIBS_CHECKSUM "SHA256=c7bc705fc2b0f8dff8b39a6c865044040e10be41567e2c951908b5051786a06b")
endif()

# cd /path/to/build && cmake /path/to/source
Expand Down
2 changes: 1 addition & 1 deletion userspace/falco/app/actions/helpers_inspector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ falco::app::run_result falco::app::actions::open_live_inspector(
{
try
{
if((s.config->m_metrics_flags & PPM_SCAP_STATS_STATE_COUNTERS))
if((s.config->m_metrics_flags & METRICS_V2_STATE_COUNTERS))
{
inspector->set_sinsp_stats_v2_enabled();
}
Expand Down
11 changes: 5 additions & 6 deletions userspace/falco/configuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ falco_configuration::falco_configuration():
m_metrics_interval(5000),
m_metrics_stats_rule_enabled(false),
m_metrics_output_file(""),
m_metrics_flags((PPM_SCAP_STATS_KERNEL_COUNTERS | PPM_SCAP_STATS_LIBBPF_STATS | PPM_SCAP_STATS_RESOURCE_UTILIZATION | PPM_SCAP_STATS_STATE_COUNTERS)),
m_metrics_flags((METRICS_V2_KERNEL_COUNTERS | METRICS_V2_LIBBPF_STATS | METRICS_V2_RESOURCE_UTILIZATION | METRICS_V2_STATE_COUNTERS)),
m_metrics_convert_memory_to_mb(true),
m_metrics_include_empty_values(false)
{
Expand Down Expand Up @@ -459,23 +459,22 @@ void falco_configuration::load_yaml(const std::string& config_name, const yaml_h
m_metrics_flags = 0;
if (config.get_scalar<bool>("metrics.resource_utilization_enabled", true))
{
m_metrics_flags |= PPM_SCAP_STATS_RESOURCE_UTILIZATION;
m_metrics_flags |= METRICS_V2_RESOURCE_UTILIZATION;

}
if (config.get_scalar<bool>("metrics.state_counters_enabled", true))
{
m_metrics_flags |= PPM_SCAP_STATS_STATE_COUNTERS;
m_metrics_flags |= METRICS_V2_STATE_COUNTERS;

}
if (config.get_scalar<bool>("metrics.kernel_event_counters_enabled", true))
{
m_metrics_flags |= PPM_SCAP_STATS_KERNEL_COUNTERS;
m_metrics_flags |= METRICS_V2_KERNEL_COUNTERS;

}
if (config.get_scalar<bool>("metrics.libbpf_stats_enabled", true))
{
m_metrics_flags |= PPM_SCAP_STATS_LIBBPF_STATS;

m_metrics_flags |= METRICS_V2_LIBBPF_STATS;
}

m_metrics_convert_memory_to_mb = config.get_scalar<bool>("metrics.convert_memory_to_mb", true);
Expand Down
202 changes: 73 additions & 129 deletions userspace/falco/stats_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,9 @@ stats_writer::collector::collector(const std::shared_ptr<stats_writer>& writer)

void stats_writer::collector::get_metrics_output_fields_wrapper(
nlohmann::json& output_fields,
const std::shared_ptr<sinsp>& inspector, uint64_t now,
const std::string& src, uint64_t num_evts, double stats_snapshot_time_delta_sec)
const std::shared_ptr<sinsp>& inspector,
const std::string& src, uint64_t num_evts,
uint64_t now, double stats_snapshot_time_delta_sec)
{
static const char* all_driver_engines[] = {
BPF_ENGINE, KMOD_ENGINE, MODERN_BPF_ENGINE,
Expand All @@ -322,8 +323,8 @@ void stats_writer::collector::get_metrics_output_fields_wrapper(
output_fields["falco.start_ts"] = agent_info->start_ts_epoch;
output_fields["falco.duration_sec"] = (uint64_t)((now - agent_info->start_ts_epoch) / ONE_SECOND_IN_NS);
output_fields["falco.kernel_release"] = agent_info->uname_r;
output_fields["evt.hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */
output_fields["falco.host_boot_ts"] = machine_info->boot_ts_epoch;
output_fields["falco.hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */
output_fields["falco.host_num_cpus"] = machine_info->num_cpus;
output_fields["falco.outputs_queue_num_drops"] = m_writer->m_outputs->get_outputs_queue_num_drops();

Expand All @@ -350,144 +351,57 @@ void stats_writer::collector::get_metrics_output_fields_wrapper(

void stats_writer::collector::get_metrics_output_fields_additional(
nlohmann::json& output_fields,
const std::shared_ptr<sinsp>& inspector,
double stats_snapshot_time_delta_sec, const std::string& src)
double stats_snapshot_time_delta_sec)
{
const scap_agent_info* agent_info = inspector->get_agent_info();

#if !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
uint32_t nstats = 0;
int32_t rc = 0;
uint32_t flags = m_writer->m_config->m_metrics_flags;

auto buffer = inspector->get_sinsp_stats_v2_buffer();
auto sinsp_stats_v2 = inspector->get_sinsp_stats_v2();
sinsp_thread_manager* thread_manager = inspector->m_thread_manager.get();
const scap_stats_v2* sinsp_stats_v2_snapshot = libsinsp::stats::get_sinsp_stats_v2(flags, agent_info, thread_manager, sinsp_stats_v2, buffer, &nstats, &rc);

uint32_t base_stat = 0;
// todo @incertum this needs to become better with the next proper stats refactor in libs 0.15.0
if ((flags & PPM_SCAP_STATS_STATE_COUNTERS) && !(flags & PPM_SCAP_STATS_RESOURCE_UTILIZATION))
#if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
if (m_writer->m_libs_metrics_collector && m_writer->m_output_rule_metrics_converter)
{
base_stat = SINSP_STATS_V2_N_THREADS;
}
// Refresh / New snapshot
m_writer->m_libs_metrics_collector->snapshot();
auto metrics_snapshot = m_writer->m_libs_metrics_collector->get_metrics();
// Cache n_evts and n_drops to derive n_drops_perc.
uint64_t n_evts = 0;
uint64_t n_drops = 0;
uint64_t n_evts_delta = 0;
uint64_t n_drops_delta = 0;

if (sinsp_stats_v2_snapshot && rc == 0 && nstats > 0)
{
for(uint32_t stat = base_stat; stat < nstats; stat++)
// Note: Because of possible metric unit conversions, get a non-const ref to the metric.
for (auto& metric: metrics_snapshot)
{
if (sinsp_stats_v2_snapshot[stat].name[0] == '\0')
if (metric.name[0] == '\0')
{
break;
}
char metric_name[STATS_NAME_MAX] = "falco.";
strlcat(metric_name, sinsp_stats_v2_snapshot[stat].name, sizeof(metric_name));
// todo @incertum temporary fix for n_fds and n_threads, type assignment was missed in libs, will be fixed in libs 0.15.0
if (strncmp(sinsp_stats_v2_snapshot[stat].name, "n_fds", 6) == 0 || strncmp(sinsp_stats_v2_snapshot[stat].name, "n_threads", 10) == 0)
if (m_writer->m_config->m_metrics_convert_memory_to_mb)
{
output_fields[metric_name] = sinsp_stats_v2_snapshot[stat].value.u64;
m_writer->m_output_rule_metrics_converter->convert_metric_to_unit_convention(metric);
}

switch(sinsp_stats_v2_snapshot[stat].type)
char metric_name[METRIC_NAME_MAX] = "falco.";
if((metric.flags & METRICS_V2_LIBBPF_STATS) || (metric.flags & METRICS_V2_KERNEL_COUNTERS) )
{
case STATS_VALUE_TYPE_U64:
if (sinsp_stats_v2_snapshot[stat].value.u64 == 0 && !m_writer->m_config->m_metrics_include_empty_values)
{
break;
}
if (m_writer->m_config->m_metrics_convert_memory_to_mb)
{
if (strncmp(sinsp_stats_v2_snapshot[stat].name, "container_memory_used", 22) == 0) // exact str match
{
output_fields[metric_name] = (uint64_t)(sinsp_stats_v2_snapshot[stat].value.u64 / (double)1024 / (double)1024);
strlcpy(metric_name, "scap.", sizeof(metric_name));
}
strlcat(metric_name, metric.name, sizeof(metric_name));

} else if (strncmp(sinsp_stats_v2_snapshot[stat].name, "memory_", 7) == 0) // prefix match
{
output_fields[metric_name] = (uint64_t)(sinsp_stats_v2_snapshot[stat].value.u64 / (double)1024);
} else
{
output_fields[metric_name] = sinsp_stats_v2_snapshot[stat].value.u64;
}
}
else
{
output_fields[metric_name] = sinsp_stats_v2_snapshot[stat].value.u64;
}
break;
case STATS_VALUE_TYPE_U32:
if (sinsp_stats_v2_snapshot[stat].value.u32 == 0 && !m_writer->m_config->m_metrics_include_empty_values)
switch (metric.type)
{
case METRIC_VALUE_TYPE_U32:
if (metric.value.u32 == 0 && !m_writer->m_config->m_metrics_include_empty_values)
{
break;
}
if (m_writer->m_config->m_metrics_convert_memory_to_mb && strncmp(sinsp_stats_v2_snapshot[stat].name, "memory_", 7) == 0) // prefix match
{
output_fields[metric_name] = (uint32_t)(sinsp_stats_v2_snapshot[stat].value.u32 / (double)1024);
}
else
{
output_fields[metric_name] = sinsp_stats_v2_snapshot[stat].value.u32;
}
output_fields[metric_name] = metric.value.u32;
break;
case STATS_VALUE_TYPE_D:
if (sinsp_stats_v2_snapshot[stat].value.d == 0 && !m_writer->m_config->m_metrics_include_empty_values)
case METRIC_VALUE_TYPE_U64:
if (strncmp(metric.name, "n_evts", 7) == 0)
{
break;
}
output_fields[metric_name] = sinsp_stats_v2_snapshot[stat].value.d;
break;
default:
break;
}
}
}

if (src != falco_common::syscall_source)
{
return;
}

/* Kernel side stats counters and libbpf stats if applicable. */
nstats = 0;
rc = 0;
if (!(inspector->check_current_engine(BPF_ENGINE) || inspector->check_current_engine(MODERN_BPF_ENGINE)))
{
flags &= ~PPM_SCAP_STATS_LIBBPF_STATS;
}

// Note: ENGINE_FLAG_BPF_STATS_ENABLED check has been moved to libs, that is, when libbpf stats is not enabled
// in the kernel settings we won't collect them even if the end user enabled the libbpf stats option

const scap_stats_v2* scap_stats_v2_snapshot = inspector->get_capture_stats_v2(flags, &nstats, &rc);
if (scap_stats_v2_snapshot && nstats > 0 && rc == 0)
{
/* Cache n_evts and n_drops to derive n_drops_perc. */
uint64_t n_evts = 0;
uint64_t n_drops = 0;
uint64_t n_evts_delta = 0;
uint64_t n_drops_delta = 0;
for(uint32_t stat = 0; stat < nstats; stat++)
{
if (scap_stats_v2_snapshot[stat].name[0] == '\0')
{
break;
}
// todo: as we expand scap_stats_v2 prefix may be pushed to scap or we may need to expand
// functionality here for example if we add userspace syscall counters that should be prefixed w/ `falco.`
char metric_name[STATS_NAME_MAX] = "scap.";
strlcat(metric_name, scap_stats_v2_snapshot[stat].name, sizeof(metric_name));
switch(scap_stats_v2_snapshot[stat].type)
{
case STATS_VALUE_TYPE_U64:
/* Always send high level n_evts related fields, even if zero. */
if (strncmp(scap_stats_v2_snapshot[stat].name, "n_evts", 7) == 0) // exact not prefix match here
{
n_evts = scap_stats_v2_snapshot[stat].value.u64;
n_evts = metric.value.u64;
// Always send high level n_evts related fields, even if zero and configs are set to exclude empty values.
output_fields[metric_name] = n_evts;
output_fields["scap.n_evts_prev"] = m_last_n_evts;
n_evts_delta = n_evts - m_last_n_evts;
if (n_evts_delta != 0 && stats_snapshot_time_delta_sec > 0)
{
/* n_evts is total number of kernel side events. */
output_fields["scap.evts_rate_sec"] = std::round((double)(n_evts_delta / stats_snapshot_time_delta_sec) * 10.0) / 10.0; // round to 1 decimal
}
else
Expand All @@ -496,16 +410,15 @@ void stats_writer::collector::get_metrics_output_fields_additional(
}
m_last_n_evts = n_evts;
}
/* Always send high level n_drops related fields, even if zero. */
else if (strncmp(scap_stats_v2_snapshot[stat].name, "n_drops", 8) == 0) // exact not prefix match here
else if (strncmp(metric.name, "n_drops", 8) == 0)
{
n_drops = scap_stats_v2_snapshot[stat].value.u64;
n_drops = metric.value.u64;
// Always send high level n_drops related fields, even if zero and configs are set to exclude empty values.
output_fields[metric_name] = n_drops;
output_fields["scap.n_drops_prev"] = m_last_n_drops;
n_drops_delta = n_drops - m_last_n_drops;
if (n_drops_delta != 0 && stats_snapshot_time_delta_sec > 0)
{
/* n_drops is total number of kernel side event drops. */
output_fields["scap.evts_drop_rate_sec"] = std::round((double)(n_drops_delta / stats_snapshot_time_delta_sec) * 10.0) / 10.0; // round to 1 decimal
}
else
Expand All @@ -514,18 +427,25 @@ void stats_writer::collector::get_metrics_output_fields_additional(
}
m_last_n_drops = n_drops;
}
if (scap_stats_v2_snapshot[stat].value.u64 == 0 && !m_writer->m_config->m_metrics_include_empty_values)
if (metric.value.u64 == 0 && !m_writer->m_config->m_metrics_include_empty_values)
{
break;
}
output_fields[metric_name] = scap_stats_v2_snapshot[stat].value.u64;
output_fields[metric_name] = metric.value.u64;
break;
case METRIC_VALUE_TYPE_D:
if (metric.value.d == 0 && !m_writer->m_config->m_metrics_include_empty_values)
{
break;
}
output_fields[metric_name] = metric.value.d;
break;
default:
break;
}
}
/* n_drops_perc needs to be calculated outside the loop given no field ordering guarantees.
* Always send n_drops_perc, even if zero. */
* Always send n_drops_perc, even if zero and configs are set to exclude empty values. */
if(n_evts_delta > 0)
{
output_fields["scap.n_drops_perc"] = (double)((100.0 * n_drops_delta) / n_evts_delta);
Expand All @@ -542,6 +462,30 @@ void stats_writer::collector::collect(const std::shared_ptr<sinsp>& inspector, c
{
if (m_writer->has_output())
{
#if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
if(!m_writer->m_libs_metrics_collector)
{
uint32_t flags = m_writer->m_config->m_metrics_flags;
// Note: ENGINE_FLAG_BPF_STATS_ENABLED check has been moved to libs, that is, when libbpf stats is not enabled
// in the kernel settings we won't collect them even if the end user enabled the libbpf stats option
if (!(inspector->check_current_engine(BPF_ENGINE) || inspector->check_current_engine(MODERN_BPF_ENGINE)))
{
flags &= ~METRICS_V2_LIBBPF_STATS;
}
// Note: src is static for live captures
if (src != falco_common::syscall_source)
{
flags &= ~(METRICS_V2_KERNEL_COUNTERS | METRICS_V2_STATE_COUNTERS | METRICS_V2_LIBBPF_STATS);

}
m_writer->m_libs_metrics_collector = std::make_unique<libs::metrics::libs_metrics_collector>(inspector.get(), flags);
}

if(!m_writer->m_output_rule_metrics_converter)
{
m_writer->m_output_rule_metrics_converter = std::make_unique<libs::metrics::output_rule_metrics_converter>();
}
#endif
/* Collect stats / metrics once per ticker period. */
auto tick = stats_writer::get_ticker();
if (tick != m_last_tick)
Expand All @@ -559,8 +503,8 @@ void stats_writer::collector::collect(const std::shared_ptr<sinsp>& inspector, c

/* Get respective metrics output_fields. */
nlohmann::json output_fields;
get_metrics_output_fields_wrapper(output_fields, inspector, now, src, num_evts, stats_snapshot_time_delta_sec);
get_metrics_output_fields_additional(output_fields, inspector, stats_snapshot_time_delta_sec, src);
get_metrics_output_fields_wrapper(output_fields, inspector, src, num_evts, now, stats_snapshot_time_delta_sec);
get_metrics_output_fields_additional(output_fields, stats_snapshot_time_delta_sec);

/* Send message in the queue */
stats_writer::msg msg;
Expand Down
11 changes: 7 additions & 4 deletions userspace/falco/stats_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ class stats_writer
/*!
\brief Collect snapshot metrics wrapper fields as internal rule formatted output fields.
*/
void get_metrics_output_fields_wrapper(nlohmann::json& output_fields, const std::shared_ptr<sinsp>& inspector, uint64_t now, const std::string& src, uint64_t num_evts, double stats_snapshot_time_delta_sec);
void get_metrics_output_fields_wrapper(nlohmann::json& output_fields, const std::shared_ptr<sinsp>& inspector, const std::string& src, uint64_t num_evts, uint64_t now, double stats_snapshot_time_delta_sec);

/*!
\brief Collect snapshot metrics syscalls related metrics as internal rule formatted output fields.
\brief Collect the configurable snapshot metrics as internal rule formatted output fields.
*/
void get_metrics_output_fields_additional(nlohmann::json& output_fields, const std::shared_ptr<sinsp>& inspector, double stats_snapshot_time_delta_sec, const std::string& src);
void get_metrics_output_fields_additional(nlohmann::json& output_fields, double stats_snapshot_time_delta_sec);

std::shared_ptr<stats_writer> m_writer;
stats_writer::ticker_t m_last_tick = 0;
Expand Down Expand Up @@ -145,10 +145,13 @@ class stats_writer
std::ofstream m_file_output;
#ifndef __EMSCRIPTEN__
tbb::concurrent_bounded_queue<stats_writer::msg> m_queue;
#endif
#if defined(__linux__) and !defined(MINIMAL_BUILD) and !defined(__EMSCRIPTEN__)
std::unique_ptr<libs::metrics::libs_metrics_collector> m_libs_metrics_collector;
std::unique_ptr<libs::metrics::output_rule_metrics_converter> m_output_rule_metrics_converter;
#endif
std::shared_ptr<falco_outputs> m_outputs;
std::shared_ptr<const falco_configuration> m_config;

// note: in this way, only collectors can push into the queue
friend class stats_writer::collector;
};
Loading