Skip to content

Commit 97f35d7

Browse files
committed
ROX-30714: track cgroup ID to container ID mapping
This change greatly reduces the amount of data sent in the ringbuffer and the effort the BPF hooks need to retrieve the cgroup of the current process. In exchange for these benefits, we now need to lookup and keep track of the cgroups and container IDs that exist on the system ourselves by iterating over the cgroupsfs. TODO: Add integration tests with containers.
1 parent ab49c9a commit 97f35d7

8 files changed

Lines changed: 303 additions & 208 deletions

File tree

fact-ebpf/src/bpf/maps.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
struct helper_t {
1111
char buf[PATH_MAX * 2];
12-
const unsigned char* array[16];
1312
};
1413

1514
struct {

fact-ebpf/src/bpf/process.h

Lines changed: 1 addition & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11,71 +11,6 @@
1111
#include <bpf/bpf_core_read.h>
1212
// clang-format on
1313

14-
__always_inline static const char* get_memory_cgroup(struct helper_t* helper) {
15-
if (!bpf_core_enum_value_exists(enum cgroup_subsys_id, memory_cgrp_id)) {
16-
return NULL;
17-
}
18-
19-
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
20-
21-
// We're guessing which cgroup controllers are enabled for this task. The
22-
// assumption is that memory controller is present more often than
23-
// cpu & cpuacct.
24-
struct kernfs_node* kn = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id], cgroup, kn);
25-
if (kn == NULL) {
26-
return NULL;
27-
}
28-
29-
int i = 0;
30-
for (; i < 16; i++) {
31-
helper->array[i] = (const unsigned char*)BPF_CORE_READ(kn, name);
32-
if (bpf_core_field_exists(kn->__parent)) {
33-
kn = BPF_CORE_READ(kn, __parent);
34-
} else {
35-
struct {
36-
struct kernfs_node* parent;
37-
}* kn_old = (void*)kn;
38-
kn = BPF_CORE_READ(kn_old, parent);
39-
}
40-
if (kn == NULL) {
41-
break;
42-
}
43-
}
44-
45-
if (i == 16) {
46-
i--;
47-
}
48-
49-
int offset = 0;
50-
for (; i >= 0 && offset < PATH_MAX; i--) {
51-
// Skip empty directories
52-
if (helper->array[i] == NULL) {
53-
continue;
54-
}
55-
56-
helper->buf[offset & (PATH_MAX - 1)] = '/';
57-
if (++offset >= PATH_MAX) {
58-
return NULL;
59-
}
60-
61-
int len = bpf_probe_read_kernel_str(&helper->buf[offset & (PATH_MAX - 1)], PATH_MAX, helper->array[i]);
62-
if (len < 0) {
63-
// We should have skipped all empty entries, any other error is a genuine
64-
// problem, stop processing.
65-
return NULL;
66-
}
67-
68-
if (len == 1) {
69-
offset--;
70-
continue;
71-
}
72-
73-
offset += len - 1;
74-
}
75-
76-
return helper->buf;
77-
}
78-
7914
__always_inline static void process_fill_lineage(process_t* p, struct helper_t* helper) {
8015
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
8116
struct path path;
@@ -112,6 +47,7 @@ __always_inline static int64_t process_fill(process_t* p) {
11247
p->gid = (uid_gid >> 32) & 0xFFFFFFFF;
11348
p->login_uid = BPF_CORE_READ(task, loginuid.val);
11449
p->pid = (bpf_get_current_pid_tgid() >> 32) & 0xFFFFFFFF;
50+
p->cgroup_id = bpf_get_current_cgroup_id();
11551
u_int64_t err = bpf_get_current_comm(p->comm, TASK_COMM_LEN);
11652
if (err != 0) {
11753
bpf_printk("Failed to fill task comm");
@@ -144,11 +80,6 @@ __always_inline static int64_t process_fill(process_t* p) {
14480
}
14581
bpf_probe_read_str(p->exe_path, PATH_MAX, exe_path);
14682

147-
const char* cg = get_memory_cgroup(helper);
148-
if (cg != NULL) {
149-
bpf_probe_read_str(p->memory_cgroup, PATH_MAX, cg);
150-
}
151-
15283
p->in_root_mount_ns = get_mount_ns() == host_mount_ns;
15384

15485
process_fill_lineage(p, helper);

fact-ebpf/src/bpf/types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ typedef struct process_t {
2020
char args[4096];
2121
unsigned int args_len;
2222
char exe_path[PATH_MAX];
23-
char memory_cgroup[PATH_MAX];
23+
unsigned long long cgroup_id;
2424
unsigned int uid;
2525
unsigned int gid;
2626
unsigned int login_uid;

fact/src/bpf.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ use tokio::{
1313
task::JoinHandle,
1414
};
1515

16-
use crate::{config::FactConfig, event::Event, host_info, metrics::EventCounter};
16+
use crate::{
17+
cgroup::ContainerIdCache, config::FactConfig, event::Event, host_info, metrics::EventCounter,
18+
};
1719

1820
use fact_ebpf::{event_t, metrics_t};
1921

@@ -97,6 +99,7 @@ impl Bpf {
9799
paths: Vec<PathBuf>,
98100
mut running: Receiver<bool>,
99101
event_counter: EventCounter,
102+
cid_cache: ContainerIdCache,
100103
) -> JoinHandle<()> {
101104
info!("Starting BPF worker...");
102105
tokio::spawn(async move {
@@ -107,7 +110,7 @@ impl Bpf {
107110
let ringbuf = guard.get_inner_mut();
108111
while let Some(event) = ringbuf.next() {
109112
let event: &event_t = unsafe { &*(event.as_ptr() as *const _) };
110-
let event = match Event::try_from(event) {
113+
let event = match Event::new(event, &cid_cache).await {
111114
Ok(event) => Arc::new(event),
112115
Err(e) => {
113116
error!("Failed to parse event: '{e}'");
@@ -175,21 +178,23 @@ mod bpf_tests {
175178
let (run_tx, run_rx) = watch::channel(true);
176179
// Create a metrics exporter, but don't start it
177180
let exporter = Exporter::new(bpf.get_metrics().unwrap());
181+
let cid_cache = ContainerIdCache::new();
178182

179183
Bpf::start_worker(
180184
tx,
181185
bpf.fd,
182186
paths,
183187
run_rx,
184188
exporter.metrics.bpf_worker.clone(),
189+
cid_cache,
185190
);
186191

187192
// Create a file
188193
let file =
189194
NamedTempFile::new_in(monitored_path).expect("Failed to create temporary file");
190195
println!("Created {file:?}");
191196

192-
let expected = Event::new(
197+
let expected = Event::from_raw_parts(
193198
file_activity_type_t_FILE_ACTIVITY_CREATION,
194199
host_info::get_hostname(),
195200
file.path().to_path_buf(),

fact/src/cgroup.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use std::{
2+
collections::HashMap,
3+
os::unix::fs::DirEntryExt,
4+
path::PathBuf,
5+
sync::Arc,
6+
time::{Duration, SystemTime},
7+
};
8+
9+
use tokio::{
10+
sync::{watch::Receiver, Mutex},
11+
task::JoinHandle,
12+
time,
13+
};
14+
15+
use crate::host_info::get_cgroup_paths;
16+
17+
#[derive(Debug)]
18+
struct ContainerIdEntry {
19+
container_id: Option<String>,
20+
pub last_seen: SystemTime,
21+
}
22+
23+
type ContainerIdMap = HashMap<u64, ContainerIdEntry>;
24+
25+
#[derive(Debug, Clone, Default)]
26+
pub struct ContainerIdCache(Arc<Mutex<ContainerIdMap>>);
27+
28+
impl ContainerIdCache {
29+
pub fn new() -> Self {
30+
let mut map = HashMap::new();
31+
ContainerIdCache::update_unlocked(&mut map);
32+
ContainerIdCache(Arc::new(Mutex::new(map)))
33+
}
34+
35+
fn update_unlocked(map: &mut ContainerIdMap) {
36+
for root in get_cgroup_paths() {
37+
ContainerIdCache::walk_proc_cgroups_inner(&root, map, None);
38+
}
39+
}
40+
41+
async fn update(&mut self) {
42+
let mut map = self.0.lock().await;
43+
ContainerIdCache::update_unlocked(&mut map);
44+
}
45+
46+
async fn prune(&mut self) {
47+
let now = SystemTime::now();
48+
self.0.lock().await.retain(|_, value| {
49+
now.duration_since(value.last_seen).unwrap() < Duration::from_secs(30)
50+
})
51+
}
52+
53+
pub async fn get_container_id(&self, cgroup_id: u64) -> Option<String> {
54+
let mut map = self.0.lock().await;
55+
match map.get(&cgroup_id) {
56+
Some(entry) => entry.container_id.clone(),
57+
None => {
58+
// Update the container ID cache and try again
59+
ContainerIdCache::update_unlocked(&mut map);
60+
map.get(&cgroup_id).map(|s| s.container_id.clone())?
61+
}
62+
}
63+
}
64+
65+
pub fn start_worker(mut self, mut running: Receiver<bool>) -> JoinHandle<()> {
66+
let mut update_interval = time::interval(time::Duration::from_secs(30));
67+
tokio::spawn(async move {
68+
loop {
69+
tokio::select! {
70+
_ = update_interval.tick() => {
71+
self.update().await;
72+
self.prune().await;
73+
},
74+
_ = running.changed() => {
75+
if !*running.borrow() {
76+
return;
77+
}
78+
}
79+
}
80+
}
81+
})
82+
}
83+
84+
fn walk_proc_cgroups_inner(path: &PathBuf, map: &mut ContainerIdMap, parent_id: Option<&str>) {
85+
for entry in std::fs::read_dir(path).unwrap() {
86+
let entry = entry.unwrap();
87+
let p = entry.path();
88+
if !p.is_dir() {
89+
continue;
90+
}
91+
92+
let container_id = match map.get_mut(&entry.ino()) {
93+
Some(e) => {
94+
e.last_seen = SystemTime::now();
95+
e.container_id.clone()
96+
}
97+
None => {
98+
let last_component = p
99+
.file_name()
100+
.map(|f| f.to_str().unwrap_or(""))
101+
.unwrap_or("");
102+
let container_id = match ContainerIdCache::extract_container_id(last_component)
103+
{
104+
Some(cid) => Some(cid),
105+
None => parent_id.map(|f| f.to_owned()),
106+
};
107+
let last_seen = SystemTime::now();
108+
map.insert(
109+
entry.ino(),
110+
ContainerIdEntry {
111+
container_id: container_id.clone(),
112+
last_seen,
113+
},
114+
);
115+
container_id
116+
}
117+
};
118+
ContainerIdCache::walk_proc_cgroups_inner(&p, map, container_id.as_deref());
119+
}
120+
}
121+
122+
pub fn extract_container_id(cgroup: &str) -> Option<String> {
123+
if cgroup.is_empty() {
124+
return None;
125+
}
126+
127+
let cgroup = cgroup.strip_suffix(".scope").unwrap_or(cgroup);
128+
if cgroup.len() < 64 {
129+
return None;
130+
}
131+
132+
let (prefix, id) = cgroup.split_at(cgroup.len() - 64);
133+
134+
if !prefix.is_empty() && !prefix.ends_with('-') {
135+
return None;
136+
}
137+
138+
if id.chars().all(|c| c.is_ascii_hexdigit()) {
139+
Some(id.split_at(12).0.to_owned())
140+
} else {
141+
None
142+
}
143+
}
144+
}
145+
146+
#[cfg(test)]
147+
mod tests {
148+
use super::*;
149+
150+
#[test]
151+
fn extract_container_id() {
152+
let tests = [
153+
("e73c55f3e7f5b6a9cfc32a89bf13e44d348bcc4fa7b079f804d61fb1532ddbe5", Some("e73c55f3e7f5")),
154+
("cri-containerd-219d7afb8e7450929eaeb06f2d27cbf7183bfa5b55b7275696f3df4154a979af.scope", Some("219d7afb8e74")),
155+
("kubelet-kubepods-burstable-pod469726a5_079d_4d15_a259_1f654b534b44.slice", None),
156+
("libpod-conmon-a2d2a36121868d946af912b931fc5f6b42bf84c700cef67784422b1e2c8585ee.scope", Some("a2d2a3612186")),
157+
("init.scope", None),
158+
("app-flatpak-com.github.IsmaelMartinez.teams_for_linux-384393947.scope", None),
159+
];
160+
161+
for (cgroup, expected) in tests {
162+
let cid = ContainerIdCache::extract_container_id(cgroup);
163+
assert_eq!(cid.as_deref(), expected);
164+
}
165+
}
166+
}

0 commit comments

Comments
 (0)