Skip to content

Commit e67bf35

Browse files
committed
Merge tag 'io_uring-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe: - Fix an inverted true/false comment on task_no_new_privs, from the BPF filtering changes merged in this release - Use the migration disabling way of running the BPF filters, as the io_uring side doesn't do that already - Fix an issue with ->rings stability under resize, both for local task_work additions and for eventfd signaling - Fix an issue with SQE mixed mode, where a bounds check wasn't correct for having a 128b SQE - Fix an issue where a legacy provided buffer group is changed to to ring mapped one while legacy buffers from that group are in flight * tag 'io_uring-7.0-20260312' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/kbuf: check if target buffer list is still legacy on recycle io_uring: fix physical SQE bounds check for SQE_MIXED 128-byte ops io_uring/eventfd: use ctx->rings_rcu for flags checking io_uring: ensure ctx->rings is stable for task work flags manipulation io_uring/bpf_filter: use bpf_prog_run_pin_on_cpu() to prevent migration io_uring/register: fix comment about task_no_new_privs
2 parents 8174daf + c2c185b commit e67bf35

7 files changed

Lines changed: 56 additions & 11 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ struct io_ring_ctx {
388388
* regularly bounce b/w CPUs.
389389
*/
390390
struct {
391+
struct io_rings __rcu *rings_rcu;
391392
struct llist_head work_llist;
392393
struct llist_head retry_llist;
393394
unsigned long check_cq;

io_uring/bpf_filter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters,
8585
do {
8686
if (filter == &dummy_filter)
8787
return -EACCES;
88-
ret = bpf_prog_run(filter->prog, &bpf_ctx);
88+
ret = bpf_prog_run_pin_on_cpu(filter->prog, &bpf_ctx);
8989
if (!ret)
9090
return -EACCES;
9191
filter = filter->next;

io_uring/eventfd.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,15 @@ void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
7676
{
7777
bool skip = false;
7878
struct io_ev_fd *ev_fd;
79-
80-
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
81-
return;
79+
struct io_rings *rings;
8280

8381
guard(rcu)();
82+
83+
rings = rcu_dereference(ctx->rings_rcu);
84+
if (!rings)
85+
return;
86+
if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
87+
return;
8488
ev_fd = rcu_dereference(ctx->io_ev_fd);
8589
/*
8690
* Check again if ev_fd exists in case an io_eventfd_unregister call

io_uring/io_uring.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
17451745
* well as 2 contiguous entries.
17461746
*/
17471747
if (!(ctx->flags & IORING_SETUP_SQE_MIXED) || *left < 2 ||
1748-
!(ctx->cached_sq_head & (ctx->sq_entries - 1)))
1748+
(unsigned)(sqe - ctx->sq_sqes) >= ctx->sq_entries - 1)
17491749
return io_init_fail_req(req, -EINVAL);
17501750
/*
17511751
* A 128b operation on a mixed SQ uses two entries, so we have
@@ -2066,6 +2066,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
20662066
io_free_region(ctx->user, &ctx->sq_region);
20672067
io_free_region(ctx->user, &ctx->ring_region);
20682068
ctx->rings = NULL;
2069+
RCU_INIT_POINTER(ctx->rings_rcu, NULL);
20692070
ctx->sq_sqes = NULL;
20702071
}
20712072

@@ -2703,6 +2704,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
27032704
if (ret)
27042705
return ret;
27052706
ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
2707+
rcu_assign_pointer(ctx->rings_rcu, rings);
27062708
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
27072709
ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset);
27082710

io_uring/kbuf.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,18 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
111111

112112
buf = req->kbuf;
113113
bl = io_buffer_get_list(ctx, buf->bgid);
114-
list_add(&buf->list, &bl->buf_list);
115-
bl->nbufs++;
114+
/*
115+
* If the buffer list was upgraded to a ring-based one, or removed,
116+
* while the request was in-flight in io-wq, drop it.
117+
*/
118+
if (bl && !(bl->flags & IOBL_BUF_RING)) {
119+
list_add(&buf->list, &bl->buf_list);
120+
bl->nbufs++;
121+
} else {
122+
kfree(buf);
123+
}
116124
req->flags &= ~REQ_F_BUFFER_SELECTED;
125+
req->kbuf = NULL;
117126

118127
io_ring_submit_unlock(ctx, issue_flags);
119128
return true;

io_uring/register.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args)
202202
return -EPERM;
203203
/*
204204
* Similar to seccomp, disallow setting a filter if task_no_new_privs
205-
* is true and we're not CAP_SYS_ADMIN.
205+
* is false and we're not CAP_SYS_ADMIN.
206206
*/
207207
if (!task_no_new_privs(current) &&
208208
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
@@ -238,7 +238,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args)
238238

239239
/*
240240
* Similar to seccomp, disallow setting a filter if task_no_new_privs
241-
* is true and we're not CAP_SYS_ADMIN.
241+
* is false and we're not CAP_SYS_ADMIN.
242242
*/
243243
if (!task_no_new_privs(current) &&
244244
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
@@ -633,7 +633,15 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
633633
ctx->sq_entries = p->sq_entries;
634634
ctx->cq_entries = p->cq_entries;
635635

636+
/*
637+
* Just mark any flag we may have missed and that the application
638+
* should act on unconditionally. Worst case it'll be an extra
639+
* syscall.
640+
*/
641+
atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
636642
ctx->rings = n.rings;
643+
rcu_assign_pointer(ctx->rings_rcu, n.rings);
644+
637645
ctx->sq_sqes = n.sq_sqes;
638646
swap_old(ctx, o, n, ring_region);
639647
swap_old(ctx, o, n, sq_region);
@@ -642,6 +650,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
642650
out:
643651
spin_unlock(&ctx->completion_lock);
644652
mutex_unlock(&ctx->mmap_lock);
653+
/* Wait for concurrent io_ctx_mark_taskrun() */
654+
if (to_free == &o)
655+
synchronize_rcu_expedited();
645656
io_register_free_rings(ctx, to_free);
646657

647658
if (ctx->sq_data)

io_uring/tw.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,21 @@ void tctx_task_work(struct callback_head *cb)
152152
WARN_ON_ONCE(ret);
153153
}
154154

155+
/*
156+
* Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the
157+
* RCU protected rings pointer to be safe against concurrent ring resizing.
158+
*/
159+
static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx)
160+
{
161+
lockdep_assert_in_rcu_read_lock();
162+
163+
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) {
164+
struct io_rings *rings = rcu_dereference(ctx->rings_rcu);
165+
166+
atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags);
167+
}
168+
}
169+
155170
void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
156171
{
157172
struct io_ring_ctx *ctx = req->ctx;
@@ -206,8 +221,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
206221
*/
207222

208223
if (!head) {
209-
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
210-
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
224+
io_ctx_mark_taskrun(ctx);
211225
if (ctx->has_evfd)
212226
io_eventfd_signal(ctx, false);
213227
}
@@ -231,6 +245,10 @@ void io_req_normal_work_add(struct io_kiocb *req)
231245
if (!llist_add(&req->io_task_work.node, &tctx->task_list))
232246
return;
233247

248+
/*
249+
* Doesn't need to use ->rings_rcu, as resizing isn't supported for
250+
* !DEFER_TASKRUN.
251+
*/
234252
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
235253
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
236254

0 commit comments

Comments
 (0)