Skip to content

Commit

Permalink
Added cgid capture to stirling with testing (#1681)
Browse files Browse the repository at this point in the history
Summary: Added Capturing of Cgroup ids to the ebpf probes in the socket
tracer. This includes modifications to data structures for communicating
that information to userspace and other data tables. It also includes
some minimal testing for ensuring that cgroup ids are captured
correctly. The reason for this change is given in the design document
for the solution to this
[issue](#1638): in short it is
the first step in using cgids rather than pids to associate a particular
piece of communication data with the correct pod.

Test Plan: Added Cgroup id capture checks to existing `socket_tracer`
tests.

Type of change: /kind Feature expansion

Signed-off-by: AdityaAtulTewari <[email protected]>
  • Loading branch information
AdityaAtulTewari authored Oct 3, 2023
1 parent 76a3ad5 commit 2a2349f
Show file tree
Hide file tree
Showing 27 changed files with 137 additions and 26 deletions.
32 changes: 32 additions & 0 deletions src/shared/metadata/cgroup_path_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@

#include <absl/strings/str_replace.h>

#include <fstream>
#include "src/common/fs/fs_wrapper.h"
#include "src/common/system/config.h"
#include "src/common/system/proc_pid_path.h"
#include "src/shared/metadata/cgroup_path_resolver.h"

DEFINE_bool(force_cgroup2_mode, true, "Flag to force assume cgroup2 fs for testing purposes");
Expand Down Expand Up @@ -76,6 +79,35 @@ StatusOr<std::string> FindSelfCGroupProcs(std::string_view base_path) {
return error::NotFound("Could not find self as a template.");
}

StatusOr<uint64_t> FindCgroupIDFromPID(pid_t pid) {
auto cgroup_name = px::system::ProcPidPath(pid, "cgroup");
std::ifstream proc_cgroup(cgroup_name);

// get rid of the id at the front
uint64_t blank;
proc_cgroup >> blank;
std::string line;
std::getline(proc_cgroup, line);

if (proc_cgroup.fail() || proc_cgroup.bad()) {
std::string failure_string =
absl::StrCat("procfs cgroup file getline failed with", "fail: ", proc_cgroup.fail(),
"bad : ", proc_cgroup.bad(), "eof: ", proc_cgroup.eof());
return error::NotFound(failure_string);
}

std::string cgroup_path_name =
absl::StrCat(px::system::Config::GetInstance().sysfs_path().string(), "/cgroup/",
line.substr(2, line.size()));

// lstat what we are pointing to
struct stat stat_buf;
if (stat(cgroup_path_name.c_str(), &stat_buf)) {
return error::NotFound(absl::StrCat(cgroup_path_name, " sysfs cgroup stat failed."));
}
return stat_buf.st_ino;
}

StatusOr<CGroupTemplateSpec> CreateCGroupTemplateSpecFromPath(std::string_view path) {
// Pattern match for a pod ID.
// Examples:
Expand Down
5 changes: 5 additions & 0 deletions src/shared/metadata/cgroup_path_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ StatusOr<std::string> CGroupBasePath(std::string_view sysfs_path);
*/
StatusOr<std::string> FindSelfCGroupProcs(std::string_view base_path);

/**
* Finds the cgroup id corresponding to the pid
**/
StatusOr<uint64_t> FindCgroupIDFromPID(pid_t pid);

/**
* Given a path to a sample cgroup.procs file for a K8s pod,
* this function produces a templated spec from which paths for other pods can be generated.
Expand Down
10 changes: 9 additions & 1 deletion src/stirling/core/canonical_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,19 @@ constexpr DataElement kTime = {
constexpr std::string_view kUPIDColName = "upid";
constexpr DataElement kUPID = {
kUPIDColName,
"An opaque numeric ID that globally identify a running process inside the cluster.",
"An opaque numeric ID that globally identifies a running process inside the cluster.",
types::DataType::UINT128,
types::SemanticType::ST_UPID,
types::PatternType::GENERAL};

constexpr std::string_view kCGIDColName = "cgid";
constexpr DataElement kCGID = {
kCGIDColName,
"The cgroup id that the process was in when this probe was triggered.",
types::DataType::INT64,
types::SemanticType::ST_NONE,
types::PatternType::GENERAL};

// clang-format on

} // namespace canonical_data_elements
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/amqp_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace stirling {
static constexpr DataElement kAMQPElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -84,6 +85,7 @@ DEFINE_PRINT_TABLE(AMQP)

constexpr int kAMQPTimeIdx = kAMQPTable.ColIndex("time_");
constexpr int kAMQPUUIDIdx = kAMQPTable.ColIndex("upid");
constexpr int kAMQPCGIDIdx = kAMQPTable.ColIndex("cgid");
constexpr int kAMQPFrameTypeIdx = kAMQPTable.ColIndex("frame_type");
constexpr int kAMQPChannelIdx = kAMQPTable.ColIndex("channel");
constexpr int kAMQPLatencyIdx = kAMQPTable.ColIndex("latency");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ static __inline void init_conn_id(uint32_t tgid, int32_t fd, struct conn_id_t* c
conn_id->upid.start_time_ticks = get_tgid_start_time();
conn_id->fd = fd;
conn_id->tsid = bpf_ktime_get_ns();
// Until we have proper cgids tracing
conn_id->cgid = bpf_get_current_cgroup_id();
}

static __inline void init_conn_info(uint32_t tgid, int32_t fd, struct conn_info_t* conn_info) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ struct conn_id_t {
int32_t fd;
// Unique id of the conn_id (timestamp).
uint64_t tsid;
// cgid of the connection
uint64_t cgid;
};

// Specifies the corresponding indexes of the entries of a per-cpu array.
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/cass_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static const std::map<int64_t, std::string_view> kCQLRespOpDecoder =
static constexpr DataElement kCQLElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -71,6 +72,7 @@ DEFINE_PRINT_TABLE(CQL)

static constexpr int kCQLTraceRoleIdx = kCQLTable.ColIndex("trace_role");
static constexpr int kCQLUPIDIdx = kCQLTable.ColIndex("upid");
static constexpr int kCQLCGIDIdx = kCQLTable.ColIndex("cgid");
static constexpr int kCQLReqOp = kCQLTable.ColIndex("req_op");
static constexpr int kCQLReqBody = kCQLTable.ColIndex("req_body");
static constexpr int kCQLRespOp = kCQLTable.ColIndex("resp_op");
Expand Down
1 change: 1 addition & 0 deletions src/stirling/source_connectors/socket_tracer/conn_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ absl::flat_hash_map<ConnStats::AggKey, ConnStats::Stats>& ConnStats::UpdateStats
stats.conn_close += conn_close;
stats.bytes_recv += bytes_recv;
stats.bytes_sent += bytes_sent;
stats.cgid_curr = tracker->conn_id().cgid;

stats.last_update = update_counter_;
}
Expand Down
1 change: 1 addition & 0 deletions src/stirling/source_connectors/socket_tracer/conn_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class ConnStats {
uint64_t conn_close = 0;
uint64_t bytes_sent = 0;
uint64_t bytes_recv = 0;
uint64_t cgid_curr = 0;

// Keep track of whether this stats object has ever been previously reported.
// Used to determine whether it should be reported in the future, after the connection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace stirling {
constexpr DataElement kConnStatsElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -68,6 +69,7 @@ namespace conn_stats_idx {

constexpr int kTime = kConnStatsTable.ColIndex("time_");
constexpr int kUPID = kConnStatsTable.ColIndex("upid");
constexpr int kCGID = kConnStatsTable.ColIndex("cgid");
constexpr int kRemoteAddr = kConnStatsTable.ColIndex("remote_addr");
constexpr int kRemotePort = kConnStatsTable.ColIndex("remote_port");
constexpr int kAddrFamily = kConnStatsTable.ColIndex("addr_family");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ TEST_F(ConnStatsTest, Basic) {
.upid = {.pid = 12345, .start_time_ticks = 1000},
.fd = 3,
.tsid = 111110,
.cgid = UINT64_MAX,
};

// The basic conn_stats_event template.
Expand Down Expand Up @@ -165,6 +166,7 @@ TEST_F(ConnStatsTest, ServerSide) {
.upid = {.pid = 12345, .start_time_ticks = 1000},
.fd = 3,
.tsid = 10000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn0_stats_event;
Expand Down Expand Up @@ -193,6 +195,7 @@ TEST_F(ConnStatsTest, ServerSide) {
.upid = {.pid = 12345, .start_time_ticks = 1000},
.fd = 4,
.tsid = 20000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn1_stats_event;
Expand Down Expand Up @@ -221,6 +224,7 @@ TEST_F(ConnStatsTest, ServerSide) {
.upid = {.pid = 12345, .start_time_ticks = 1000},
.fd = 5,
.tsid = 30000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn2_stats_event;
Expand Down Expand Up @@ -257,6 +261,7 @@ TEST_F(ConnStatsTest, ClientSide) {
.upid = {.pid = 11111, .start_time_ticks = 1000},
.fd = 3,
.tsid = 10000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn0_stats_event;
Expand Down Expand Up @@ -285,6 +290,7 @@ TEST_F(ConnStatsTest, ClientSide) {
.upid = {.pid = 11111, .start_time_ticks = 1000},
.fd = 4,
.tsid = 20000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn1_stats_event;
Expand Down Expand Up @@ -313,6 +319,7 @@ TEST_F(ConnStatsTest, ClientSide) {
.upid = {.pid = 11111, .start_time_ticks = 1000},
.fd = 5,
.tsid = 30000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn2_stats_event;
Expand Down Expand Up @@ -344,6 +351,7 @@ TEST_F(ConnStatsTest, NoEventsIfNoRemoteAddr) {
.upid = {.pid = 11111, .start_time_ticks = 1000},
.fd = 3,
.tsid = 10000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn_stats_event;
Expand Down Expand Up @@ -371,6 +379,7 @@ TEST_F(ConnStatsTest, DisabledConnTracker) {
.upid = {.pid = 11111, .start_time_ticks = 1000},
.fd = 3,
.tsid = 10000,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn_stats_event;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ConnTrackerHTTP2Test : public ::testing::Test {
}

const conn_id_t kConnID = {
.upid = {{.pid = 123}, .start_time_ticks = 11000000}, .fd = 3, .tsid = 21};
.upid = {{.pid = 123}, .start_time_ticks = 11000000}, .fd = 3, .tsid = 21, .cgid = 33};

ConnTracker tracker_;
testing::MockClock mock_clock_;
Expand Down Expand Up @@ -193,7 +193,7 @@ TEST_F(ConnTrackerHTTP2Test, MixedHeadersAndData) {
TEST_F(ConnTrackerHTTP2Test, ZeroFD) {
ConnTracker tracker;
const conn_id_t kConnID = {
.upid = {{.pid = 123}, .start_time_ticks = 11000000}, .fd = 0, .tsid = 21};
.upid = {{.pid = 123}, .start_time_ticks = 11000000}, .fd = 0, .tsid = 21, .cgid = 33};
tracker.SetConnID(kConnID);

const int kStreamID = 7;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,7 @@ TEST_F(ConnTrackerTest, ConnStats) {
.upid = {.pid = 12345, .start_time_ticks = 1000},
.fd = 3,
.tsid = 111110,
.cgid = UINT64_MAX,
};

struct conn_stats_event_t conn_stats_event;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ TEST_F(ConnTrackersManagerTest, Fuzz) {
int32_t fd = 1;
uint64_t tsid = tsid_dist(rng_);

struct conn_id_t conn_id = {{{pid}, 0}, fd, tsid};
struct conn_id_t conn_id = {{{pid}, 0}, fd, tsid, UINT64_MAX};
TrackerEvent(conn_id, protocol.value());
} else if (x < 0.95) {
int death_countdown = death_countdown_dist(rng_);
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/dns_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ namespace stirling {
static constexpr DataElement kDNSElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -63,6 +64,7 @@ static constexpr auto kDNSTable =
DEFINE_PRINT_TABLE(DNS)

static constexpr int kDNSUPIDIdx = kDNSTable.ColIndex("upid");
static constexpr int kDNSCGIDIdx = kDNSTable.ColIndex("cgid");
static constexpr int kDNSReqHdrIdx = kDNSTable.ColIndex("req_header");
static constexpr int kDNSReqBodyIdx = kDNSTable.ColIndex("req_body");
static constexpr int kDNSRespHdrIdx = kDNSTable.ColIndex("resp_header");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace stirling {
constexpr DataElement kHTTP2MessagesElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -58,6 +59,7 @@ DEFINE_PRINT_TABLE(HTTP2Messages)

constexpr int kHTTP2MessagesTimeIdx = kHTTP2MessagesTable.ColIndex("time_");
constexpr int kHTTP2MessagesUPIDIdx = kHTTP2MessagesTable.ColIndex("upid");
constexpr int kHTTP2MessagesCGIDIdx = kHTTP2MessagesTable.ColIndex("cgid");
constexpr int kHTTP2MessagesRemoteAddrIdx = kHTTP2MessagesTable.ColIndex("remote_addr");
constexpr int kHTTP2MessagesRemotePortIdx = kHTTP2MessagesTable.ColIndex("remote_port");
constexpr int kHTTP2MessagesTraceRoleIdx = kHTTP2MessagesTable.ColIndex("trace_role");
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/http_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ static const std::map<int64_t, std::string_view> kHTTPContentTypeDecoder =
constexpr DataElement kHTTPElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -110,6 +111,7 @@ DEFINE_PRINT_TABLE(HTTP)

constexpr int kHTTPTimeIdx = kHTTPTable.ColIndex("time_");
constexpr int kHTTPUPIDIdx = kHTTPTable.ColIndex("upid");
constexpr int kHTTPCGIDIdx = kHTTPTable.ColIndex("cgid");
constexpr int kHTTPRemoteAddrIdx = kHTTPTable.ColIndex("remote_addr");
constexpr int kHTTPRemotePortIdx = kHTTPTable.ColIndex("remote_port");
constexpr int kHTTPTraceRoleIdx = kHTTPTable.ColIndex("trace_role");
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/kafka_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ static const std::map<int64_t, std::string_view> kKafkaAPIKeyDecoder =
static constexpr DataElement kKafkaElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -68,6 +69,7 @@ DEFINE_PRINT_TABLE(Kafka)

constexpr int kKafkaTimeIdx = kKafkaTable.ColIndex("time_");
constexpr int kKafkaUPIDIdx = kKafkaTable.ColIndex("upid");
constexpr int kKafkaCGIDIdx = kKafkaTable.ColIndex("cgid");
constexpr int kKafkaReqCmdIdx = kKafkaTable.ColIndex("req_cmd");
constexpr int kKafkaClientIDIdx = kKafkaTable.ColIndex("client_id");
constexpr int kKafkaReqBodyIdx = kKafkaTable.ColIndex("req_body");
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/mux_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ namespace stirling {
static constexpr DataElement kMuxElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand All @@ -56,6 +57,7 @@ static constexpr auto kMuxTable =
DEFINE_PRINT_TABLE(Mux)

static constexpr int kMuxUPIDIdx = kMuxTable.ColIndex("upid");
static constexpr int kMuxCGIDIdx = kMuxTable.ColIndex("cgid");
static constexpr int kMuxReqTypeIdx = kMuxTable.ColIndex("req_type");

} // namespace stirling
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/mysql_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static const std::map<int64_t, std::string_view> kMySQLRespStatusDecoder =
static constexpr DataElement kMySQLElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -71,6 +72,7 @@ DEFINE_PRINT_TABLE(MySQL)

constexpr int kMySQLTimeIdx = kMySQLTable.ColIndex("time_");
constexpr int kMySQLUPIDIdx = kMySQLTable.ColIndex("upid");
constexpr int kMySQLCGIDIdx = kMySQLTable.ColIndex("cgid");
constexpr int kMySQLReqCmdIdx = kMySQLTable.ColIndex("req_cmd");
constexpr int kMySQLReqBodyIdx = kMySQLTable.ColIndex("req_body");
constexpr int kMySQLRespStatusIdx = kMySQLTable.ColIndex("resp_status");
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/nats_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace stirling {
constexpr DataElement kNATSElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand All @@ -53,6 +54,7 @@ namespace nats_idx {

constexpr int kTime = kNATSTable.ColIndex("time_");
constexpr int kUPID = kNATSTable.ColIndex("upid");
constexpr int kCGID = kNATSTable.ColIndex("cgid");
constexpr int kRemoteAddr = kNATSTable.ColIndex("remote_addr");
constexpr int kRemotePort = kNATSTable.ColIndex("remote_port");
constexpr int kRemoteRole = kNATSTable.ColIndex("trace_role");
Expand Down
2 changes: 2 additions & 0 deletions src/stirling/source_connectors/socket_tracer/pgsql_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace stirling {
static constexpr DataElement kPGSQLElements[] = {
canonical_data_elements::kTime,
canonical_data_elements::kUPID,
canonical_data_elements::kCGID,
canonical_data_elements::kRemoteAddr,
canonical_data_elements::kRemotePort,
canonical_data_elements::kTraceRole,
Expand Down Expand Up @@ -56,6 +57,7 @@ static constexpr auto kPGSQLTable = DataTableSchema(
DEFINE_PRINT_TABLE(PGSQL)

constexpr int kPGSQLUPIDIdx = kPGSQLTable.ColIndex("upid");
constexpr int kPGSQLCGIDIdx = kPGSQLTable.ColIndex("cgid");
constexpr int kPGSQLReqIdx = kPGSQLTable.ColIndex("req");
constexpr int kPGSQLRespIdx = kPGSQLTable.ColIndex("resp");
constexpr int kPGSQLReqCmdIdx = kPGSQLTable.ColIndex("req_cmd");
Expand Down
Loading

0 comments on commit 2a2349f

Please sign in to comment.