Skip to content

Commit d6f5841

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-allow-utf-8-literals-in-bpf_bprintf_prepare'
Yihan Ding says: ==================== bpf: allow UTF-8 literals in bpf_bprintf_prepare() bpf_bprintf_prepare() currently rejects any non-ASCII byte in format strings, so helpers such as bpf_trace_printk() fail to emit UTF-8 literal text even when those bytes are not part of a format specifier. Keep plain text permissive while continuing to parse '%' sequences as ASCII-only. Patch 1 updates snprintf_negative() at the same time so the selftests stay consistent during bisection. Patch 2 then extends trace_printk coverage for both the valid UTF-8 literal case and the invalid non-ASCII-after-'%' case. Changes in v3: - drop Suggested-by trailers and move review credit into this changelog - update test_snprintf_negative() in patch 1/2 so plain non-ASCII text is accepted while non-ASCII after '%' is still rejected, keeping ./test_progs -t snprintf aligned with the new behavior. - clarify the trace_printk negative case with an explicit invalid format string and comment - address Paul Chaignon's review feedback and keep the negative coverage requested earlier by Alan Maguire Changes in v2: - split the core change and selftest updates into two patches - drop unnecessary isspace()/ispunct() casts - add comments to clarify plain-text vs format-specifier handling - add a negative selftest for non-ASCII bytes inside '%' sequences Testing: - Reproduced on x86_64 without the core fix: ASCII trace output works, while UTF-8 literal text in bpf_trace_printk() is rejected and produces no trace output - Verified with tools/testing/selftests/bpf: ./test_progs -t trace_printk - Verified with tools/testing/selftests/bpf: ./test_progs -t snprintf ==================== Link: https://patch.msgid.link/20260416120142.1420646-1-dingyihan@uniontech.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 766bf02 + 4198ff3 commit d6f5841

4 files changed

Lines changed: 50 additions & 8 deletions

File tree

kernel/bpf/helpers.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,13 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
845845
data->buf = buffers->buf;
846846

847847
for (i = 0; i < fmt_size; i++) {
848-
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
848+
unsigned char c = fmt[i];
849+
850+
/*
851+
* Permit bytes >= 0x80 in plain text so UTF-8 literals can pass
852+
* through unchanged, while still rejecting ASCII control bytes.
853+
*/
854+
if (isascii(c) && !isprint(c) && !isspace(c)) {
849855
err = -EINVAL;
850856
goto out;
851857
}
@@ -867,6 +873,15 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
867873
* always access fmt[i + 1], in the worst case it will be a 0
868874
*/
869875
i++;
876+
c = fmt[i];
877+
/*
878+
* The format parser below only understands ASCII conversion
879+
* specifiers and modifiers, so reject non-ASCII after '%'.
880+
*/
881+
if (!isascii(c)) {
882+
err = -EINVAL;
883+
goto out;
884+
}
870885

871886
/* skip optional "[0 +-][num]" width formatting field */
872887
while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||

tools/testing/selftests/bpf/prog_tests/snprintf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ static void test_snprintf_negative(void)
114114
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
115115
ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
116116
ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
117-
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
117+
ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text");
118+
ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier");
118119
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
119120
ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
120121
ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");

tools/testing/selftests/bpf/prog_tests/trace_printk.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,21 @@
66
#include "trace_printk.lskel.h"
77

88
#define SEARCHMSG "testing,testing"
9+
#define SEARCHMSG_UTF8 "中文,测试"
910

1011
static void trace_pipe_cb(const char *str, void *data)
1112
{
1213
if (strstr(str, SEARCHMSG) != NULL)
13-
(*(int *)data)++;
14+
((int *)data)[0]++;
15+
if (strstr(str, SEARCHMSG_UTF8))
16+
((int *)data)[1]++;
1417
}
1518

1619
void serial_test_trace_printk(void)
1720
{
1821
struct trace_printk_lskel__bss *bss;
1922
struct trace_printk_lskel *skel;
20-
int err = 0, found = 0;
23+
int err = 0, found[2] = {};
2124

2225
skel = trace_printk_lskel__open();
2326
if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
@@ -46,11 +49,24 @@ void serial_test_trace_printk(void)
4649
if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
4750
goto cleanup;
4851

49-
/* verify our search string is in the trace buffer */
50-
ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
51-
"read_trace_pipe_iter");
52+
if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran"))
53+
goto cleanup;
54+
55+
if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret"))
56+
goto cleanup;
57+
58+
if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0,
59+
"bss->trace_printk_invalid_spec_ret"))
60+
goto cleanup;
61+
62+
/* verify our search strings are in the trace buffer */
63+
ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000),
64+
"read_trace_pipe_iter");
65+
66+
if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found"))
67+
goto cleanup;
5268

53-
if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
69+
if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8"))
5470
goto cleanup;
5571

5672
cleanup:

tools/testing/selftests/bpf/progs/trace_printk.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,23 @@ char _license[] SEC("license") = "GPL";
1010

1111
int trace_printk_ret = 0;
1212
int trace_printk_ran = 0;
13+
int trace_printk_invalid_spec_ret = 0;
14+
int trace_printk_utf8_ret = 0;
15+
int trace_printk_utf8_ran = 0;
1316

1417
const char fmt[] = "Testing,testing %d\n";
18+
static const char utf8_fmt[] = "中文,测试 %d\n";
19+
/* Non-ASCII bytes after '%' must still be rejected. */
20+
static const char invalid_spec_fmt[] = "%\x80\n";
1521

1622
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
1723
int sys_enter(void *ctx)
1824
{
1925
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
2026
++trace_printk_ran);
27+
trace_printk_utf8_ret = bpf_trace_printk(utf8_fmt, sizeof(utf8_fmt),
28+
++trace_printk_utf8_ran);
29+
trace_printk_invalid_spec_ret = bpf_trace_printk(invalid_spec_fmt,
30+
sizeof(invalid_spec_fmt));
2131
return 0;
2232
}

0 commit comments

Comments
 (0)