Skip to content

Commit 9080517

Browse files
namhyunggregkh
authored andcommitted
bpf: Adjust BPF stack helper functions to accommodate skip > 0
commit ee2a098 upstream. Let's say that the caller has storage for num_elem stack frames. Then, the BPF stack helper functions walk the stack for only num_elem frames. This means that if skip > 0, one keeps only 'num_elem - skip' frames. This is because it sets init_nr in the perf_callchain_entry to the end of the buffer to save num_elem entries only. I believe it was because the perf callchain code unwound the stack frames until it reached the global max size (sysctl_perf_event_max_stack). However it now has perf_callchain_entry_ctx.max_stack to limit the iteration locally. This simplifies the code to handle init_nr in the BPF callstack entries and removes the confusion with the perf_event's __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0. Also change the comment on bpf_get_stack() in the header file to be more explicit what the return value means. Fixes: c195651 ("bpf: add bpf_get_stack helper") Signed-off-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com Link: https://lore.kernel.org/bpf/20220314182042.71025-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
1 parent 8648949 commit 9080517

2 files changed

Lines changed: 28 additions & 36 deletions

File tree

include/uapi/linux/bpf.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,8 +2163,8 @@ union bpf_attr {
21632163
*
21642164
* # sysctl kernel.perf_event_max_stack=<new value>
21652165
* Return
2166-
* A non-negative value equal to or less than *size* on success,
2167-
* or a negative error in case of failure.
2166+
* The non-negative copied *buf* length equal to or less than
2167+
* *size* on success, or a negative error in case of failure.
21682168
*
21692169
* long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
21702170
* Description
@@ -3448,8 +3448,8 @@ union bpf_attr {
34483448
*
34493449
* # sysctl kernel.perf_event_max_stack=<new value>
34503450
* Return
3451-
* A non-negative value equal to or less than *size* on success,
3452-
* or a negative error in case of failure.
3451+
* The non-negative copied *buf* length equal to or less than
3452+
* *size* on success, or a negative error in case of failure.
34533453
*
34543454
* long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
34553455
* Description

kernel/bpf/stackmap.c

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
358358
}
359359

360360
static struct perf_callchain_entry *
361-
get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
361+
get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
362362
{
363363
#ifdef CONFIG_STACKTRACE
364364
struct perf_callchain_entry *entry;
@@ -369,9 +369,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
369369
if (!entry)
370370
return NULL;
371371

372-
entry->nr = init_nr +
373-
stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
374-
sysctl_perf_event_max_stack - init_nr, 0);
372+
entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
373+
max_depth, 0);
375374

376375
/* stack_trace_save_tsk() works on unsigned long array, while
377376
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
@@ -383,7 +382,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
383382
int i;
384383

385384
/* copy data from the end to avoid using extra buffer */
386-
for (i = entry->nr - 1; i >= (int)init_nr; i--)
385+
for (i = entry->nr - 1; i >= 0; i--)
387386
to[i] = (u64)(from[i]);
388387
}
389388

@@ -400,27 +399,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
400399
{
401400
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
402401
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
403-
u32 max_depth = map->value_size / stack_map_data_size(map);
404-
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
405-
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
406402
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
407403
u32 hash, id, trace_nr, trace_len;
408404
bool user = flags & BPF_F_USER_STACK;
409405
u64 *ips;
410406
bool hash_matches;
411407

412-
/* get_perf_callchain() guarantees that trace->nr >= init_nr
413-
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
414-
*/
415-
trace_nr = trace->nr - init_nr;
416-
417-
if (trace_nr <= skip)
408+
if (trace->nr <= skip)
418409
/* skipping more than usable stack trace */
419410
return -EFAULT;
420411

421-
trace_nr -= skip;
412+
trace_nr = trace->nr - skip;
422413
trace_len = trace_nr * sizeof(u64);
423-
ips = trace->ip + skip + init_nr;
414+
ips = trace->ip + skip;
424415
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
425416
id = hash & (smap->n_buckets - 1);
426417
bucket = READ_ONCE(smap->buckets[id]);
@@ -477,8 +468,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
477468
u64, flags)
478469
{
479470
u32 max_depth = map->value_size / stack_map_data_size(map);
480-
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
481-
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
471+
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
482472
bool user = flags & BPF_F_USER_STACK;
483473
struct perf_callchain_entry *trace;
484474
bool kernel = !user;
@@ -487,8 +477,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
487477
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
488478
return -EINVAL;
489479

490-
trace = get_perf_callchain(regs, init_nr, kernel, user,
491-
sysctl_perf_event_max_stack, false, false);
480+
max_depth += skip;
481+
if (max_depth > sysctl_perf_event_max_stack)
482+
max_depth = sysctl_perf_event_max_stack;
483+
484+
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
485+
false, false);
492486

493487
if (unlikely(!trace))
494488
/* couldn't fetch the stack trace */
@@ -579,7 +573,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
579573
struct perf_callchain_entry *trace_in,
580574
void *buf, u32 size, u64 flags)
581575
{
582-
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
576+
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
583577
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
584578
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
585579
bool user = flags & BPF_F_USER_STACK;
@@ -604,30 +598,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
604598
goto err_fault;
605599

606600
num_elem = size / elem_size;
607-
if (sysctl_perf_event_max_stack < num_elem)
608-
init_nr = 0;
609-
else
610-
init_nr = sysctl_perf_event_max_stack - num_elem;
601+
max_depth = num_elem + skip;
602+
if (sysctl_perf_event_max_stack < max_depth)
603+
max_depth = sysctl_perf_event_max_stack;
611604

612605
if (trace_in)
613606
trace = trace_in;
614607
else if (kernel && task)
615-
trace = get_callchain_entry_for_task(task, init_nr);
608+
trace = get_callchain_entry_for_task(task, max_depth);
616609
else
617-
trace = get_perf_callchain(regs, init_nr, kernel, user,
618-
sysctl_perf_event_max_stack,
610+
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
619611
false, false);
620612
if (unlikely(!trace))
621613
goto err_fault;
622614

623-
trace_nr = trace->nr - init_nr;
624-
if (trace_nr < skip)
615+
if (trace->nr < skip)
625616
goto err_fault;
626617

627-
trace_nr -= skip;
618+
trace_nr = trace->nr - skip;
628619
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
629620
copy_len = trace_nr * elem_size;
630-
ips = trace->ip + skip + init_nr;
621+
622+
ips = trace->ip + skip;
631623
if (user && user_build_id)
632624
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
633625
else

0 commit comments

Comments
 (0)