Skip to content

Commit 06a24ef

Browse files
committed
Merge branch 'io_uring-7.0' into for-next
* io_uring-7.0: io_uring/bpf_filters: retain COW'ed settings on parse failures io_uring: protect remaining lockless ctx->rings accesses with RCU io_uring/rsrc: reject zero-length fixed buffer import io_uring/net: fix slab-out-of-bounds read in io_bundle_nbufs()
2 parents ead8ccb + aa35dd6 commit 06a24ef

7 files changed

Lines changed: 87 additions & 29 deletions

File tree

io_uring/io_uring.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,7 +2015,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
20152015
if (ctx->flags & IORING_SETUP_SQ_REWIND)
20162016
entries = ctx->sq_entries;
20172017
else
2018-
entries = io_sqring_entries(ctx);
2018+
entries = __io_sqring_entries(ctx);
20192019

20202020
entries = min(nr, entries);
20212021
if (unlikely(!entries))
@@ -2250,7 +2250,9 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
22502250
*/
22512251
poll_wait(file, &ctx->poll_wq, wait);
22522252

2253-
if (!io_sqring_full(ctx))
2253+
rcu_read_lock();
2254+
2255+
if (!__io_sqring_full(ctx))
22542256
mask |= EPOLLOUT | EPOLLWRNORM;
22552257

22562258
/*
@@ -2270,6 +2272,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
22702272
if (__io_cqring_events_user(ctx) || io_has_work(ctx))
22712273
mask |= EPOLLIN | EPOLLRDNORM;
22722274

2275+
rcu_read_unlock();
22732276
return mask;
22742277
}
22752278

io_uring/io_uring.h

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,28 @@ struct io_wait_queue {
142142
#endif
143143
};
144144

145+
static inline struct io_rings *io_get_rings(struct io_ring_ctx *ctx)
146+
{
147+
return rcu_dereference_check(ctx->rings_rcu,
148+
lockdep_is_held(&ctx->uring_lock) ||
149+
lockdep_is_held(&ctx->completion_lock));
150+
}
151+
145152
static inline bool io_should_wake(struct io_wait_queue *iowq)
146153
{
147154
struct io_ring_ctx *ctx = iowq->ctx;
148-
int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
155+
struct io_rings *rings;
156+
int dist;
157+
158+
guard(rcu)();
159+
rings = io_get_rings(ctx);
149160

150161
/*
151162
* Wake up if we have enough events, or if a timeout occurred since we
152163
* started waiting. For timeouts, we always want to return to userspace,
153164
* regardless of event count.
154165
*/
166+
dist = READ_ONCE(rings->cq.tail) - (int) iowq->cq_tail;
155167
return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
156168
}
157169

@@ -431,9 +443,9 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
431443
__io_wq_wake(&ctx->cq_wait);
432444
}
433445

434-
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
446+
static inline bool __io_sqring_full(struct io_ring_ctx *ctx)
435447
{
436-
struct io_rings *r = ctx->rings;
448+
struct io_rings *r = io_get_rings(ctx);
437449

438450
/*
439451
* SQPOLL must use the actual sqring head, as using the cached_sq_head
@@ -445,16 +457,28 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
445457
return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries;
446458
}
447459

448-
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
460+
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
449461
{
450-
struct io_rings *rings = ctx->rings;
462+
guard(rcu)();
463+
return __io_sqring_full(ctx);
464+
}
465+
466+
static inline unsigned int __io_sqring_entries(struct io_ring_ctx *ctx)
467+
{
468+
struct io_rings *rings = io_get_rings(ctx);
451469
unsigned int entries;
452470

453471
/* make sure SQ entry isn't read before tail */
454472
entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
455473
return min(entries, ctx->sq_entries);
456474
}
457475

476+
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
477+
{
478+
guard(rcu)();
479+
return __io_sqring_entries(ctx);
480+
}
481+
458482
/*
459483
* Don't complete immediately but use deferred completion infrastructure.
460484
* Protected by ->uring_lock and can only be used either with

io_uring/net.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
421421

422422
sr->done_io = 0;
423423
sr->len = READ_ONCE(sqe->len);
424+
if (unlikely(sr->len < 0))
425+
return -EINVAL;
424426
sr->flags = READ_ONCE(sqe->ioprio);
425427
if (sr->flags & ~SENDMSG_FLAGS)
426428
return -EINVAL;
@@ -791,6 +793,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
791793

792794
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
793795
sr->len = READ_ONCE(sqe->len);
796+
if (unlikely(sr->len < 0))
797+
return -EINVAL;
794798
sr->flags = READ_ONCE(sqe->ioprio);
795799
if (sr->flags & ~RECVMSG_FLAGS)
796800
return -EINVAL;

io_uring/register.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,17 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
178178
return -EBUSY;
179179

180180
ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
181-
/* Reset all restrictions if an error happened */
181+
/*
182+
* Reset all restrictions if an error happened, but retain any COW'ed
183+
* settings.
184+
*/
182185
if (ret < 0) {
186+
struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters;
187+
bool cowed = ctx->restrictions.bpf_filters_cow;
188+
183189
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
190+
ctx->restrictions.bpf_filters = bpf;
191+
ctx->restrictions.bpf_filters_cow = cowed;
184192
return ret;
185193
}
186194
if (ctx->restrictions.op_registered)

io_uring/rsrc.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,10 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
10611061
return ret;
10621062
if (!(imu->dir & (1 << ddir)))
10631063
return -EFAULT;
1064+
if (unlikely(!len)) {
1065+
iov_iter_bvec(iter, ddir, NULL, 0, 0);
1066+
return 0;
1067+
}
10641068

10651069
offset = buf_addr - imu->ubuf;
10661070

io_uring/wait.c

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,15 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer)
7979
if (io_has_work(ctx))
8080
goto out_wake;
8181
/* got events since we started waiting, min timeout is done */
82-
if (iowq->cq_min_tail != READ_ONCE(ctx->rings->cq.tail))
83-
goto out_wake;
84-
/* if we have any events and min timeout expired, we're done */
85-
if (io_cqring_events(ctx))
86-
goto out_wake;
82+
scoped_guard(rcu) {
83+
struct io_rings *rings = io_get_rings(ctx);
8784

85+
if (iowq->cq_min_tail != READ_ONCE(rings->cq.tail))
86+
goto out_wake;
87+
/* if we have any events and min timeout expired, we're done */
88+
if (io_cqring_events(ctx))
89+
goto out_wake;
90+
}
8891
/*
8992
* If using deferred task_work running and application is waiting on
9093
* more than one request, ensure we reset it now where we are switching
@@ -186,9 +189,9 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
186189
struct ext_arg *ext_arg)
187190
{
188191
struct io_wait_queue iowq;
189-
struct io_rings *rings = ctx->rings;
192+
struct io_rings *rings;
190193
ktime_t start_time;
191-
int ret;
194+
int ret, nr_wait;
192195

193196
min_events = min_t(int, min_events, ctx->cq_entries);
194197

@@ -201,15 +204,23 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
201204

202205
if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)))
203206
io_cqring_do_overflow_flush(ctx);
204-
if (__io_cqring_events_user(ctx) >= min_events)
207+
208+
rcu_read_lock();
209+
rings = io_get_rings(ctx);
210+
if (__io_cqring_events_user(ctx) >= min_events) {
211+
rcu_read_unlock();
205212
return 0;
213+
}
206214

207215
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
208216
iowq.wq.private = current;
209217
INIT_LIST_HEAD(&iowq.wq.entry);
210218
iowq.ctx = ctx;
211-
iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
212-
iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail);
219+
iowq.cq_tail = READ_ONCE(rings->cq.head) + min_events;
220+
iowq.cq_min_tail = READ_ONCE(rings->cq.tail);
221+
nr_wait = (int) iowq.cq_tail - READ_ONCE(rings->cq.tail);
222+
rcu_read_unlock();
223+
rings = NULL;
213224
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
214225
iowq.hit_timeout = 0;
215226
iowq.min_timeout = ext_arg->min_time;
@@ -240,14 +251,6 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
240251
trace_io_uring_cqring_wait(ctx, min_events);
241252
do {
242253
unsigned long check_cq;
243-
int nr_wait;
244-
245-
/* if min timeout has been hit, don't reset wait count */
246-
if (!iowq.hit_timeout)
247-
nr_wait = (int) iowq.cq_tail -
248-
READ_ONCE(ctx->rings->cq.tail);
249-
else
250-
nr_wait = 1;
251254

252255
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
253256
atomic_set(&ctx->cq_wait_nr, nr_wait);
@@ -298,11 +301,20 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
298301
break;
299302
}
300303
cond_resched();
304+
305+
/* if min timeout has been hit, don't reset wait count */
306+
if (!iowq.hit_timeout)
307+
scoped_guard(rcu)
308+
nr_wait = (int) iowq.cq_tail -
309+
READ_ONCE(io_get_rings(ctx)->cq.tail);
310+
else
311+
nr_wait = 1;
301312
} while (1);
302313

303314
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
304315
finish_wait(&ctx->cq_wait, &iowq.wq);
305316
restore_saved_sigmask_unless(ret == -EINTR);
306317

307-
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
318+
guard(rcu)();
319+
return READ_ONCE(io_get_rings(ctx)->cq.head) == READ_ONCE(io_get_rings(ctx)->cq.tail) ? ret : 0;
308320
}

io_uring/wait.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,15 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx);
2828

2929
static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
3030
{
31-
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
31+
struct io_rings *rings = io_get_rings(ctx);
32+
return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
3233
}
3334

3435
static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
3536
{
36-
return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
37+
struct io_rings *rings = io_get_rings(ctx);
38+
39+
return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
3740
}
3841

3942
/*

0 commit comments

Comments
 (0)