Skip to content

Commit f1a424e

Browse files
committed
io_uring: switch struct io_ring_ctx internal bitfields to flags
Bitfields cannot be set and checked atomically, and this makes it more clear that these are indeed in shared storage and must be checked and set in a sane fashion. This is in preparation for annotating a few of the known racy, but harmless, flags checking. No intended functional changes in this patch. Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 0e46cb5 commit f1a424e

10 files changed

Lines changed: 82 additions & 73 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -268,24 +268,30 @@ struct io_alloc_cache {
268268
unsigned int init_clear;
269269
};
270270

271+
enum {
272+
IO_RING_F_DRAIN_NEXT = BIT(0),
273+
IO_RING_F_OP_RESTRICTED = BIT(1),
274+
IO_RING_F_REG_RESTRICTED = BIT(2),
275+
IO_RING_F_OFF_TIMEOUT_USED = BIT(3),
276+
IO_RING_F_DRAIN_ACTIVE = BIT(4),
277+
IO_RING_F_HAS_EVFD = BIT(5),
278+
/* all CQEs should be posted only by the submitter task */
279+
IO_RING_F_TASK_COMPLETE = BIT(6),
280+
IO_RING_F_LOCKLESS_CQ = BIT(7),
281+
IO_RING_F_SYSCALL_IOPOLL = BIT(8),
282+
IO_RING_F_POLL_ACTIVATED = BIT(9),
283+
IO_RING_F_DRAIN_DISABLED = BIT(10),
284+
IO_RING_F_COMPAT = BIT(11),
285+
IO_RING_F_IOWQ_LIMITS_SET = BIT(12),
286+
};
287+
271288
struct io_ring_ctx {
272289
/* const or read-mostly hot data */
273290
struct {
291+
/* ring setup flags */
274292
unsigned int flags;
275-
unsigned int drain_next: 1;
276-
unsigned int op_restricted: 1;
277-
unsigned int reg_restricted: 1;
278-
unsigned int off_timeout_used: 1;
279-
unsigned int drain_active: 1;
280-
unsigned int has_evfd: 1;
281-
/* all CQEs should be posted only by the submitter task */
282-
unsigned int task_complete: 1;
283-
unsigned int lockless_cq: 1;
284-
unsigned int syscall_iopoll: 1;
285-
unsigned int poll_activated: 1;
286-
unsigned int drain_disabled: 1;
287-
unsigned int compat: 1;
288-
unsigned int iowq_limits_set : 1;
293+
/* internal state flags IO_RING_F_* flags , mostly read-only */
294+
unsigned int int_flags;
289295

290296
struct task_struct *submitter_task;
291297
struct io_rings *rings;

io_uring/eventfd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
148148
spin_unlock(&ctx->completion_lock);
149149

150150
ev_fd->eventfd_async = eventfd_async;
151-
ctx->has_evfd = true;
151+
ctx->int_flags |= IO_RING_F_HAS_EVFD;
152152
refcount_set(&ev_fd->refs, 1);
153153
atomic_set(&ev_fd->ops, 0);
154154
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
@@ -162,7 +162,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
162162
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
163163
lockdep_is_held(&ctx->uring_lock));
164164
if (ev_fd) {
165-
ctx->has_evfd = false;
165+
ctx->int_flags &= ~IO_RING_F_HAS_EVFD;
166166
rcu_assign_pointer(ctx->io_ev_fd, NULL);
167167
io_eventfd_put(ev_fd);
168168
return 0;

io_uring/io_uring.c

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -477,17 +477,17 @@ static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx)
477477

478478
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
479479
{
480-
if (ctx->poll_activated)
480+
if (ctx->int_flags & IO_RING_F_POLL_ACTIVATED)
481481
io_poll_wq_wake(ctx);
482-
if (ctx->off_timeout_used)
482+
if (ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)
483483
io_flush_timeouts(ctx);
484-
if (ctx->has_evfd)
484+
if (ctx->int_flags & IO_RING_F_HAS_EVFD)
485485
io_eventfd_signal(ctx, true);
486486
}
487487

488488
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
489489
{
490-
if (!ctx->lockless_cq)
490+
if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
491491
spin_lock(&ctx->completion_lock);
492492
}
493493

@@ -500,11 +500,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
500500
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
501501
{
502502
io_commit_cqring(ctx);
503-
if (!ctx->task_complete) {
504-
if (!ctx->lockless_cq)
503+
if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
504+
if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
505505
spin_unlock(&ctx->completion_lock);
506506
/* IOPOLL rings only need to wake up if it's also SQPOLL */
507-
if (!ctx->syscall_iopoll)
507+
if (!(ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL))
508508
io_cqring_wake(ctx);
509509
}
510510
io_commit_cqring_flush(ctx);
@@ -830,7 +830,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
830830
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
831831
{
832832
lockdep_assert_held(&ctx->uring_lock);
833-
lockdep_assert(ctx->lockless_cq);
833+
lockdep_assert(ctx->int_flags & IO_RING_F_LOCKLESS_CQ);
834834

835835
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
836836
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
@@ -860,7 +860,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
860860
lockdep_assert(!io_wq_current_is_worker());
861861
lockdep_assert_held(&ctx->uring_lock);
862862

863-
if (!ctx->lockless_cq) {
863+
if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
864864
spin_lock(&ctx->completion_lock);
865865
posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
866866
spin_unlock(&ctx->completion_lock);
@@ -885,7 +885,7 @@ bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe cqe[2])
885885
lockdep_assert_held(&ctx->uring_lock);
886886

887887
cqe[0].user_data = req->cqe.user_data;
888-
if (!ctx->lockless_cq) {
888+
if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
889889
spin_lock(&ctx->completion_lock);
890890
posted = io_fill_cqe_aux32(ctx, cqe);
891891
spin_unlock(&ctx->completion_lock);
@@ -913,7 +913,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
913913
* Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
914914
* the submitter task context, IOPOLL protects with uring_lock.
915915
*/
916-
if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) {
916+
if ((ctx->int_flags & IO_RING_F_LOCKLESS_CQ) || (req->flags & REQ_F_REISSUE)) {
917917
defer_complete:
918918
req->io_task_work.func = io_req_task_complete;
919919
io_req_task_work_add(req);
@@ -1135,7 +1135,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
11351135
*/
11361136
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
11371137
unlikely(!io_fill_cqe_req(ctx, req))) {
1138-
if (ctx->lockless_cq)
1138+
if (ctx->int_flags & IO_RING_F_LOCKLESS_CQ)
11391139
io_cqe_overflow(ctx, &req->cqe, &req->big_cqe);
11401140
else
11411141
io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe);
@@ -1148,7 +1148,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
11481148
INIT_WQ_LIST(&state->compl_reqs);
11491149
}
11501150

1151-
if (unlikely(ctx->drain_active))
1151+
if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
11521152
io_queue_deferred(ctx);
11531153

11541154
ctx->submit_state.cq_flush = false;
@@ -1344,7 +1344,7 @@ static __cold void io_drain_req(struct io_kiocb *req)
13441344
list_add_tail(&de->list, &ctx->defer_list);
13451345
io_queue_deferred(ctx);
13461346
if (!drain && list_empty(&ctx->defer_list))
1347-
ctx->drain_active = false;
1347+
ctx->int_flags &= ~IO_RING_F_DRAIN_ACTIVE;
13481348
}
13491349

13501350
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
@@ -1655,7 +1655,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
16551655
} else {
16561656
/* can't fail with IO_URING_F_INLINE */
16571657
io_req_sqe_copy(req, IO_URING_F_INLINE);
1658-
if (unlikely(req->ctx->drain_active))
1658+
if (unlikely(req->ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
16591659
io_drain_req(req);
16601660
else
16611661
io_queue_iowq(req);
@@ -1671,7 +1671,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
16711671
struct io_kiocb *req,
16721672
unsigned int sqe_flags)
16731673
{
1674-
if (!ctx->op_restricted)
1674+
if (!(ctx->int_flags & IO_RING_F_OP_RESTRICTED))
16751675
return true;
16761676
if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
16771677
return false;
@@ -1691,7 +1691,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
16911691
{
16921692
struct io_kiocb *head = ctx->submit_state.link.head;
16931693

1694-
ctx->drain_active = true;
1694+
ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
16951695
if (head) {
16961696
/*
16971697
* If we need to drain a request in the middle of a link, drain
@@ -1701,7 +1701,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
17011701
* link.
17021702
*/
17031703
head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
1704-
ctx->drain_next = true;
1704+
ctx->int_flags |= IO_RING_F_DRAIN_NEXT;
17051705
}
17061706
}
17071707

@@ -1767,23 +1767,23 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
17671767
req->buf_index = READ_ONCE(sqe->buf_group);
17681768
}
17691769
if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
1770-
ctx->drain_disabled = true;
1770+
ctx->int_flags |= IO_RING_F_DRAIN_DISABLED;
17711771
if (sqe_flags & IOSQE_IO_DRAIN) {
1772-
if (ctx->drain_disabled)
1772+
if (ctx->int_flags & IO_RING_F_DRAIN_DISABLED)
17731773
return io_init_fail_req(req, -EOPNOTSUPP);
17741774
io_init_drain(ctx);
17751775
}
17761776
}
1777-
if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) {
1777+
if (unlikely(ctx->int_flags & (IO_RING_F_OP_RESTRICTED | IO_RING_F_DRAIN_ACTIVE | IO_RING_F_DRAIN_NEXT))) {
17781778
if (!io_check_restriction(ctx, req, sqe_flags))
17791779
return io_init_fail_req(req, -EACCES);
17801780
/* knock it to the slow queue path, will be drained there */
1781-
if (ctx->drain_active)
1781+
if (ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)
17821782
req->flags |= REQ_F_FORCE_ASYNC;
17831783
/* if there is no link, we're at "next" request and need to drain */
1784-
if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) {
1785-
ctx->drain_next = false;
1786-
ctx->drain_active = true;
1784+
if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_NEXT) && !ctx->submit_state.link.head) {
1785+
ctx->int_flags &= ~IO_RING_F_DRAIN_NEXT;
1786+
ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
17871787
req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
17881788
}
17891789
}
@@ -2204,7 +2204,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb)
22042204
poll_wq_task_work);
22052205

22062206
mutex_lock(&ctx->uring_lock);
2207-
ctx->poll_activated = true;
2207+
ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
22082208
mutex_unlock(&ctx->uring_lock);
22092209

22102210
/*
@@ -2219,9 +2219,9 @@ __cold void io_activate_pollwq(struct io_ring_ctx *ctx)
22192219
{
22202220
spin_lock(&ctx->completion_lock);
22212221
/* already activated or in progress */
2222-
if (ctx->poll_activated || ctx->poll_wq_task_work.func)
2222+
if ((ctx->int_flags & IO_RING_F_POLL_ACTIVATED) || ctx->poll_wq_task_work.func)
22232223
goto out;
2224-
if (WARN_ON_ONCE(!ctx->task_complete))
2224+
if (WARN_ON_ONCE(!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)))
22252225
goto out;
22262226
if (!ctx->submitter_task)
22272227
goto out;
@@ -2242,7 +2242,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
22422242
struct io_ring_ctx *ctx = file->private_data;
22432243
__poll_t mask = 0;
22442244

2245-
if (unlikely(!ctx->poll_activated))
2245+
if (unlikely(!(ctx->int_flags & IO_RING_F_POLL_ACTIVATED)))
22462246
io_activate_pollwq(ctx);
22472247
/*
22482248
* provides mb() which pairs with barrier from wq_has_sleeper
@@ -2607,7 +2607,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
26072607
goto out;
26082608
}
26092609
if (flags & IORING_ENTER_GETEVENTS) {
2610-
if (ctx->syscall_iopoll)
2610+
if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL)
26112611
goto iopoll_locked;
26122612
/*
26132613
* Ignore errors, we'll soon call io_cqring_wait() and
@@ -2622,7 +2622,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
26222622
if (flags & IORING_ENTER_GETEVENTS) {
26232623
int ret2;
26242624

2625-
if (ctx->syscall_iopoll) {
2625+
if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) {
26262626
/*
26272627
* We disallow the app entering submit/complete with
26282628
* polling, but we still need to lock the ring to
@@ -2923,9 +2923,9 @@ static void io_ctx_restriction_clone(struct io_ring_ctx *ctx,
29232923
if (dst->bpf_filters)
29242924
WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters);
29252925
if (dst->op_registered)
2926-
ctx->op_restricted = 1;
2926+
ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
29272927
if (dst->reg_registered)
2928-
ctx->reg_restricted = 1;
2928+
ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
29292929
}
29302930

29312931
static __cold int io_uring_create(struct io_ctx_config *config)
@@ -2952,17 +2952,18 @@ static __cold int io_uring_create(struct io_ctx_config *config)
29522952

29532953
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
29542954
!(ctx->flags & IORING_SETUP_IOPOLL))
2955-
ctx->task_complete = true;
2955+
ctx->int_flags |= IO_RING_F_TASK_COMPLETE;
29562956

2957-
if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
2958-
ctx->lockless_cq = true;
2957+
if ((ctx->int_flags & IO_RING_F_TASK_COMPLETE) ||
2958+
(ctx->flags & IORING_SETUP_IOPOLL))
2959+
ctx->int_flags |= IO_RING_F_LOCKLESS_CQ;
29592960

29602961
/*
29612962
* lazy poll_wq activation relies on ->task_complete for synchronisation
29622963
* purposes, see io_activate_pollwq()
29632964
*/
2964-
if (!ctx->task_complete)
2965-
ctx->poll_activated = true;
2965+
if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE))
2966+
ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
29662967

29672968
/*
29682969
* When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
@@ -2972,9 +2973,10 @@ static __cold int io_uring_create(struct io_ctx_config *config)
29722973
*/
29732974
if (ctx->flags & IORING_SETUP_IOPOLL &&
29742975
!(ctx->flags & IORING_SETUP_SQPOLL))
2975-
ctx->syscall_iopoll = 1;
2976+
ctx->int_flags |= IO_RING_F_SYSCALL_IOPOLL;
29762977

2977-
ctx->compat = in_compat_syscall();
2978+
if (in_compat_syscall())
2979+
ctx->int_flags |= IO_RING_F_COMPAT;
29782980
if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
29792981
ctx->user = get_uid(current_user());
29802982

io_uring/io_uring.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
211211

212212
if (ctx->flags & IORING_SETUP_IOPOLL) {
213213
lockdep_assert_held(&ctx->uring_lock);
214-
} else if (!ctx->task_complete) {
214+
} else if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
215215
lockdep_assert_held(&ctx->completion_lock);
216216
} else if (ctx->submitter_task) {
217217
/*
@@ -228,7 +228,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
228228

229229
static inline bool io_is_compat(struct io_ring_ctx *ctx)
230230
{
231-
return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat);
231+
return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->int_flags & IO_RING_F_COMPAT);
232232
}
233233

234234
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
@@ -472,8 +472,9 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
472472

473473
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
474474
{
475-
if (unlikely(ctx->off_timeout_used ||
476-
ctx->has_evfd || ctx->poll_activated))
475+
if (unlikely(ctx->int_flags & (IO_RING_F_OFF_TIMEOUT_USED |
476+
IO_RING_F_HAS_EVFD |
477+
IO_RING_F_POLL_ACTIVATED)))
477478
__io_commit_cqring_flush(ctx);
478479
}
479480

io_uring/msg_ring.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
6767

6868
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
6969
{
70-
return target_ctx->task_complete;
70+
return target_ctx->int_flags & IO_RING_F_TASK_COMPLETE;
7171
}
7272

7373
static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw)

io_uring/register.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,9 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
184184
return ret;
185185
}
186186
if (ctx->restrictions.op_registered)
187-
ctx->op_restricted = 1;
187+
ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
188188
if (ctx->restrictions.reg_registered)
189-
ctx->reg_restricted = 1;
189+
ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
190190
return 0;
191191
}
192192

@@ -384,7 +384,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
384384
for (i = 0; i < ARRAY_SIZE(new_count); i++)
385385
if (new_count[i])
386386
ctx->iowq_limits[i] = new_count[i];
387-
ctx->iowq_limits_set = true;
387+
ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET;
388388

389389
if (tctx && tctx->io_wq) {
390390
ret = io_wq_max_workers(tctx->io_wq, new_count);
@@ -725,7 +725,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
725725
if (ctx->submitter_task && ctx->submitter_task != current)
726726
return -EEXIST;
727727

728-
if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
728+
if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
729729
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
730730
if (!test_bit(opcode, ctx->restrictions.register_op))
731731
return -EACCES;

0 commit comments

Comments
 (0)