Skip to content

Commit 033af2b

Browse files
isilenceaxboe
authored andcommitted
io_uring: introduce callback driven main loop
The io_uring_enter() has a fixed order of execution: it submits requests, waits for completions, and returns to the user. Allow to optionally replace it with a custom loop driven by a callback called loop_step. The basic requirements to the callback is that it should be able to submit requests, wait for completions, parse them and repeat. Most of the communication including parameter passing can be implemented via shared memory. The callback should return IOU_LOOP_CONTINUE to continue execution or IOU_LOOP_STOP to return to the user space. Note that the kernel may decide to prematurely terminate it as well, e.g. in case the process was signalled or killed. The hook takes a structure with parameters. It can be used to ask the kernel to wait for CQEs by setting cq_wait_idx to the CQE index it wants to wait for. Spurious wake ups are possible and even likely, the callback is expected to handle it. There will be more parameters in the future like timeout. It can be used with kernel callbacks, for example, as a slow path deprecation mechanism overwiting SQEs and emulating the wanted behaviour, however it's more useful together with BPF programs implemented in following patches. Note that keeping it separately from the normal io_uring wait loop makes things much simpler and cleaner. It keeps it in one place instead of spreading a bunch of checks in different places including disabling the submission path. It holds the lock by default, which is a better fit for BPF synchronisation and the loop execution model. It nicely avoids existing quirks like forced wake ups on timeout request completion. And it should be easier to implement new features. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://patch.msgid.link/a2d369aa1c9dd23ad7edac9220cffc563abcaed6.1772109579.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent f144dba commit 033af2b

6 files changed

Lines changed: 136 additions & 1 deletion

File tree

include/linux/io_uring_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ enum io_uring_cmd_flags {
4141
IO_URING_F_COMPAT = (1 << 12),
4242
};
4343

44+
struct iou_loop_params;
45+
4446
struct io_wq_work_node {
4547
struct io_wq_work_node *next;
4648
};
@@ -361,6 +363,9 @@ struct io_ring_ctx {
361363
struct io_alloc_cache rw_cache;
362364
struct io_alloc_cache cmd_cache;
363365

366+
int (*loop_step)(struct io_ring_ctx *ctx,
367+
struct iou_loop_params *);
368+
364369
/*
365370
* Any cancelable uring_cmd is added to this list in
366371
* ->uring_cmd() by io_uring_cmd_insert_cancelable()

io_uring/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
1414
advise.o openclose.o statx.o timeout.o \
1515
cancel.o waitid.o register.o \
1616
truncate.o memmap.o alloc_cache.o \
17-
query.o
17+
query.o loop.o
1818

1919
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
2020
obj-$(CONFIG_IO_WQ) += io-wq.o

io_uring/io_uring.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include "eventfd.h"
9696
#include "wait.h"
9797
#include "bpf_filter.h"
98+
#include "loop.h"
9899

99100
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
100101
IOSQE_IO_HARDLINK | IOSQE_ASYNC)
@@ -588,6 +589,11 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
588589
mutex_unlock(&ctx->uring_lock);
589590
}
590591

592+
void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx)
593+
{
594+
__io_cqring_overflow_flush(ctx, false);
595+
}
596+
591597
/* must to be called somewhat shortly after putting a request */
592598
static inline void io_put_task(struct io_kiocb *req)
593599
{
@@ -2571,6 +2577,11 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
25712577
if (unlikely(smp_load_acquire(&ctx->flags) & IORING_SETUP_R_DISABLED))
25722578
goto out;
25732579

2580+
if (io_has_loop_ops(ctx)) {
2581+
ret = io_run_loop(ctx);
2582+
goto out;
2583+
}
2584+
25742585
/*
25752586
* For SQ polling, the thread will do all submissions and completions.
25762587
* Just return the requested submit count, and wake the thread if

io_uring/loop.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#include "io_uring.h"
3+
#include "wait.h"
4+
#include "loop.h"
5+
6+
static inline int io_loop_nr_cqes(const struct io_ring_ctx *ctx,
7+
const struct iou_loop_params *lp)
8+
{
9+
return lp->cq_wait_idx - READ_ONCE(ctx->rings->cq.tail);
10+
}
11+
12+
static inline void io_loop_wait_start(struct io_ring_ctx *ctx, unsigned nr_wait)
13+
{
14+
atomic_set(&ctx->cq_wait_nr, nr_wait);
15+
set_current_state(TASK_INTERRUPTIBLE);
16+
}
17+
18+
static inline void io_loop_wait_finish(struct io_ring_ctx *ctx)
19+
{
20+
__set_current_state(TASK_RUNNING);
21+
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
22+
}
23+
24+
static void io_loop_wait(struct io_ring_ctx *ctx, struct iou_loop_params *lp,
25+
unsigned nr_wait)
26+
{
27+
io_loop_wait_start(ctx, nr_wait);
28+
29+
if (unlikely(io_local_work_pending(ctx) ||
30+
io_loop_nr_cqes(ctx, lp) <= 0) ||
31+
READ_ONCE(ctx->check_cq)) {
32+
io_loop_wait_finish(ctx);
33+
return;
34+
}
35+
36+
mutex_unlock(&ctx->uring_lock);
37+
schedule();
38+
io_loop_wait_finish(ctx);
39+
mutex_lock(&ctx->uring_lock);
40+
}
41+
42+
static int __io_run_loop(struct io_ring_ctx *ctx)
43+
{
44+
struct iou_loop_params lp = {};
45+
46+
while (true) {
47+
int nr_wait, step_res;
48+
49+
if (unlikely(!ctx->loop_step))
50+
return -EFAULT;
51+
52+
step_res = ctx->loop_step(ctx, &lp);
53+
if (step_res == IOU_LOOP_STOP)
54+
break;
55+
if (step_res != IOU_LOOP_CONTINUE)
56+
return -EINVAL;
57+
58+
nr_wait = io_loop_nr_cqes(ctx, &lp);
59+
if (nr_wait > 0)
60+
io_loop_wait(ctx, &lp, nr_wait);
61+
else
62+
nr_wait = 0;
63+
64+
if (task_work_pending(current)) {
65+
mutex_unlock(&ctx->uring_lock);
66+
io_run_task_work();
67+
mutex_lock(&ctx->uring_lock);
68+
}
69+
if (unlikely(task_sigpending(current)))
70+
return -EINTR;
71+
io_run_local_work_locked(ctx, nr_wait);
72+
73+
if (READ_ONCE(ctx->check_cq) & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
74+
io_cqring_overflow_flush_locked(ctx);
75+
}
76+
77+
return 0;
78+
}
79+
80+
int io_run_loop(struct io_ring_ctx *ctx)
81+
{
82+
int ret;
83+
84+
if (!io_allowed_run_tw(ctx))
85+
return -EEXIST;
86+
87+
mutex_lock(&ctx->uring_lock);
88+
ret = __io_run_loop(ctx);
89+
mutex_unlock(&ctx->uring_lock);
90+
return ret;
91+
}

io_uring/loop.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#ifndef IOU_LOOP_H
3+
#define IOU_LOOP_H
4+
5+
#include <linux/io_uring_types.h>
6+
7+
struct iou_loop_params {
8+
/*
9+
* The CQE index to wait for. Only serves as a hint and can still be
10+
* woken up earlier.
11+
*/
12+
__u32 cq_wait_idx;
13+
};
14+
15+
enum {
16+
IOU_LOOP_CONTINUE = 0,
17+
IOU_LOOP_STOP,
18+
};
19+
20+
static inline bool io_has_loop_ops(struct io_ring_ctx *ctx)
21+
{
22+
return data_race(ctx->loop_step);
23+
}
24+
25+
int io_run_loop(struct io_ring_ctx *ctx);
26+
27+
#endif

io_uring/wait.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2525
struct ext_arg *ext_arg);
2626
int io_run_task_work_sig(struct io_ring_ctx *ctx);
2727
void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx);
28+
void io_cqring_overflow_flush_locked(struct io_ring_ctx *ctx);
2829

2930
static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
3031
{

0 commit comments

Comments
 (0)