Skip to content

Commit 9618908

Browse files
committed
io_uring: ensure ctx->rings is stable for task work flags manipulation
If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while the ring is being resized, it's possible for the OR'ing of IORING_SQ_TASKRUN to happen in the small window of swapping into the new rings and the old rings being freed. Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is protected by RCU. The task work flags manipulation is inside RCU already, and if the resize ring freeing is done post an RCU synchronize, then there's no need to add locking to the fast path of task work additions. Note: this is only done for DEFER_TASKRUN, as that's the only setup mode that supports ring resizing. If this ever changes, then they too need to use the io_ctx_mark_taskrun() helper. Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/ Cc: stable@vger.kernel.org Fixes: 79cfe9e ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") Reported-by: Hao-Yu Yang <naup96721@gmail.com> Suggested-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 785d462 commit 9618908

4 files changed

Lines changed: 34 additions & 2 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ struct io_ring_ctx {
388388
* regularly bounce b/w CPUs.
389389
*/
390390
struct {
391+
struct io_rings __rcu *rings_rcu;
391392
struct llist_head work_llist;
392393
struct llist_head retry_llist;
393394
unsigned long check_cq;

io_uring/io_uring.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,6 +2066,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
20662066
io_free_region(ctx->user, &ctx->sq_region);
20672067
io_free_region(ctx->user, &ctx->ring_region);
20682068
ctx->rings = NULL;
2069+
RCU_INIT_POINTER(ctx->rings_rcu, NULL);
20692070
ctx->sq_sqes = NULL;
20702071
}
20712072

@@ -2703,6 +2704,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
27032704
if (ret)
27042705
return ret;
27052706
ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
2707+
rcu_assign_pointer(ctx->rings_rcu, rings);
27062708
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
27072709
ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset);
27082710

io_uring/register.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,15 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
633633
ctx->sq_entries = p->sq_entries;
634634
ctx->cq_entries = p->cq_entries;
635635

636+
/*
637+
* Just mark any flag we may have missed and that the application
638+
* should act on unconditionally. Worst case it'll be an extra
639+
* syscall.
640+
*/
641+
atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
636642
ctx->rings = n.rings;
643+
rcu_assign_pointer(ctx->rings_rcu, n.rings);
644+
637645
ctx->sq_sqes = n.sq_sqes;
638646
swap_old(ctx, o, n, ring_region);
639647
swap_old(ctx, o, n, sq_region);
@@ -642,6 +650,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
642650
out:
643651
spin_unlock(&ctx->completion_lock);
644652
mutex_unlock(&ctx->mmap_lock);
653+
/* Wait for concurrent io_ctx_mark_taskrun() */
654+
if (to_free == &o)
655+
synchronize_rcu_expedited();
645656
io_register_free_rings(ctx, to_free);
646657

647658
if (ctx->sq_data)

io_uring/tw.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,21 @@ void tctx_task_work(struct callback_head *cb)
152152
WARN_ON_ONCE(ret);
153153
}
154154

155+
/*
156+
* Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the
157+
* RCU protected rings pointer to be safe against concurrent ring resizing.
158+
*/
159+
static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx)
160+
{
161+
lockdep_assert_in_rcu_read_lock();
162+
163+
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) {
164+
struct io_rings *rings = rcu_dereference(ctx->rings_rcu);
165+
166+
atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags);
167+
}
168+
}
169+
155170
void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
156171
{
157172
struct io_ring_ctx *ctx = req->ctx;
@@ -206,8 +221,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
206221
*/
207222

208223
if (!head) {
209-
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
210-
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
224+
io_ctx_mark_taskrun(ctx);
211225
if (ctx->has_evfd)
212226
io_eventfd_signal(ctx, false);
213227
}
@@ -231,6 +245,10 @@ void io_req_normal_work_add(struct io_kiocb *req)
231245
if (!llist_add(&req->io_task_work.node, &tctx->task_list))
232246
return;
233247

248+
/*
249+
* Doesn't need to use ->rings_rcu, as resizing isn't supported for
250+
* !DEFER_TASKRUN.
251+
*/
234252
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
235253
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
236254

0 commit comments

Comments
 (0)