Skip to content

Commit 7745de3

Browse files
committed
Merge branch 'for-7.1/io_uring' into for-next
* for-7.1/io_uring: (23 commits) io_uring/bpf-ops: implement bpf ops registration io_uring/bpf-ops: add kfunc helpers io_uring/bpf-ops: implement loop_step with BPF struct_ops io_uring: introduce callback driven main loop nvme: remove nvme_dev_uring_cmd() IO_URING_F_IOPOLL check io_uring/uring_cmd: allow non-iopoll cmds with IORING_SETUP_IOPOLL io_uring: count CQEs in io_iopoll_check() io_uring: remove iopoll_queue from struct io_issue_def io_uring: add REQ_F_IOPOLL io_uring: mark known and harmless racy ctx->int_flags uses io_uring: switch struct io_ring_ctx internal bitfields to flags io_uring/zctx: separate notification user_data io_uring/net: allow vectorised regbuf send zc io_uring/timeout: immediate timeout arg io_uring/timeout: migrate reqs from ts64 to ktime io_uring/timeout: add helper for parsing user time io_uring/timeout: check unused sqe fields io_uring/zcrx: move zcrx uapi into separate header io_uring/zcrx: declare some constants for query io_uring/zctx: unify zerocopy issue variants ...
2 parents 479f382 + 98f3763 commit 7745de3

31 files changed

Lines changed: 790 additions & 347 deletions

drivers/nvme/host/ioctl.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -786,10 +786,6 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
786786
struct nvme_ctrl *ctrl = ioucmd->file->private_data;
787787
int ret;
788788

789-
/* IOPOLL not supported yet */
790-
if (issue_flags & IO_URING_F_IOPOLL)
791-
return -EOPNOTSUPP;
792-
793789
ret = nvme_uring_cmd_checks(issue_flags);
794790
if (ret)
795791
return ret;

include/linux/io_uring_types.h

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include <linux/llist.h>
99
#include <uapi/linux/io_uring.h>
1010

11+
struct iou_loop_params;
12+
struct io_uring_bpf_ops;
13+
1114
enum {
1215
/*
1316
* A hint to not wake right away but delay until there are enough of
@@ -41,6 +44,8 @@ enum io_uring_cmd_flags {
4144
IO_URING_F_COMPAT = (1 << 12),
4245
};
4346

47+
struct iou_loop_params;
48+
4449
struct io_wq_work_node {
4550
struct io_wq_work_node *next;
4651
};
@@ -268,24 +273,30 @@ struct io_alloc_cache {
268273
unsigned int init_clear;
269274
};
270275

276+
enum {
277+
IO_RING_F_DRAIN_NEXT = BIT(0),
278+
IO_RING_F_OP_RESTRICTED = BIT(1),
279+
IO_RING_F_REG_RESTRICTED = BIT(2),
280+
IO_RING_F_OFF_TIMEOUT_USED = BIT(3),
281+
IO_RING_F_DRAIN_ACTIVE = BIT(4),
282+
IO_RING_F_HAS_EVFD = BIT(5),
283+
/* all CQEs should be posted only by the submitter task */
284+
IO_RING_F_TASK_COMPLETE = BIT(6),
285+
IO_RING_F_LOCKLESS_CQ = BIT(7),
286+
IO_RING_F_SYSCALL_IOPOLL = BIT(8),
287+
IO_RING_F_POLL_ACTIVATED = BIT(9),
288+
IO_RING_F_DRAIN_DISABLED = BIT(10),
289+
IO_RING_F_COMPAT = BIT(11),
290+
IO_RING_F_IOWQ_LIMITS_SET = BIT(12),
291+
};
292+
271293
struct io_ring_ctx {
272294
/* const or read-mostly hot data */
273295
struct {
296+
/* ring setup flags */
274297
unsigned int flags;
275-
unsigned int drain_next: 1;
276-
unsigned int op_restricted: 1;
277-
unsigned int reg_restricted: 1;
278-
unsigned int off_timeout_used: 1;
279-
unsigned int drain_active: 1;
280-
unsigned int has_evfd: 1;
281-
/* all CQEs should be posted only by the submitter task */
282-
unsigned int task_complete: 1;
283-
unsigned int lockless_cq: 1;
284-
unsigned int syscall_iopoll: 1;
285-
unsigned int poll_activated: 1;
286-
unsigned int drain_disabled: 1;
287-
unsigned int compat: 1;
288-
unsigned int iowq_limits_set : 1;
298+
/* internal state flags IO_RING_F_* flags , mostly read-only */
299+
unsigned int int_flags;
289300

290301
struct task_struct *submitter_task;
291302
struct io_rings *rings;
@@ -355,6 +366,9 @@ struct io_ring_ctx {
355366
struct io_alloc_cache rw_cache;
356367
struct io_alloc_cache cmd_cache;
357368

369+
int (*loop_step)(struct io_ring_ctx *ctx,
370+
struct iou_loop_params *);
371+
358372
/*
359373
* Any cancelable uring_cmd is added to this list in
360374
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
@@ -477,6 +491,8 @@ struct io_ring_ctx {
477491
DECLARE_HASHTABLE(napi_ht, 4);
478492
#endif
479493

494+
struct io_uring_bpf_ops *bpf_ops;
495+
480496
/*
481497
* Protection for resize vs mmap races - both the mmap and resize
482498
* side will need to grab this lock, to prevent either side from
@@ -544,6 +560,7 @@ enum {
544560
REQ_F_HAS_METADATA_BIT,
545561
REQ_F_IMPORT_BUFFER_BIT,
546562
REQ_F_SQE_COPIED_BIT,
563+
REQ_F_IOPOLL_BIT,
547564

548565
/* not a real bit, just to check we're not overflowing the space */
549566
__REQ_F_LAST_BIT,
@@ -635,6 +652,8 @@ enum {
635652
REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
636653
/* ->sqe_copy() has been called, if necessary */
637654
REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT),
655+
/* request must be iopolled to completion (set in ->issue()) */
656+
REQ_F_IOPOLL = IO_REQ_FLAG(REQ_F_IOPOLL_BIT),
638657
};
639658

640659
struct io_tw_req {

include/uapi/linux/io_uring.h

Lines changed: 7 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
#include <linux/fs.h>
1212
#include <linux/types.h>
13+
#include <linux/io_uring/zcrx.h>
14+
1315
/*
1416
* this file is shared with liburing and that has to autodetect
1517
* if linux/time_types.h is available or not, it can
@@ -341,6 +343,10 @@ enum io_uring_op {
341343

342344
/*
343345
* sqe->timeout_flags
346+
*
347+
* IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout
348+
* value in nanoseconds instead of
349+
* pointing to a timespec.
344350
*/
345351
#define IORING_TIMEOUT_ABS (1U << 0)
346352
#define IORING_TIMEOUT_UPDATE (1U << 1)
@@ -349,6 +355,7 @@ enum io_uring_op {
349355
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
350356
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
351357
#define IORING_TIMEOUT_MULTISHOT (1U << 6)
358+
#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7)
352359
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
353360
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
354361
/*
@@ -1050,100 +1057,6 @@ struct io_timespec {
10501057
__u64 tv_nsec;
10511058
};
10521059

1053-
/* Zero copy receive refill queue entry */
1054-
struct io_uring_zcrx_rqe {
1055-
__u64 off;
1056-
__u32 len;
1057-
__u32 __pad;
1058-
};
1059-
1060-
struct io_uring_zcrx_cqe {
1061-
__u64 off;
1062-
__u64 __pad;
1063-
};
1064-
1065-
/* The bit from which area id is encoded into offsets */
1066-
#define IORING_ZCRX_AREA_SHIFT 48
1067-
#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
1068-
1069-
struct io_uring_zcrx_offsets {
1070-
__u32 head;
1071-
__u32 tail;
1072-
__u32 rqes;
1073-
__u32 __resv2;
1074-
__u64 __resv[2];
1075-
};
1076-
1077-
enum io_uring_zcrx_area_flags {
1078-
IORING_ZCRX_AREA_DMABUF = 1,
1079-
};
1080-
1081-
struct io_uring_zcrx_area_reg {
1082-
__u64 addr;
1083-
__u64 len;
1084-
__u64 rq_area_token;
1085-
__u32 flags;
1086-
__u32 dmabuf_fd;
1087-
__u64 __resv2[2];
1088-
};
1089-
1090-
enum zcrx_reg_flags {
1091-
ZCRX_REG_IMPORT = 1,
1092-
};
1093-
1094-
enum zcrx_features {
1095-
/*
1096-
* The user can ask for the desired rx page size by passing the
1097-
* value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
1098-
*/
1099-
ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
1100-
};
1101-
1102-
/*
1103-
* Argument for IORING_REGISTER_ZCRX_IFQ
1104-
*/
1105-
struct io_uring_zcrx_ifq_reg {
1106-
__u32 if_idx;
1107-
__u32 if_rxq;
1108-
__u32 rq_entries;
1109-
__u32 flags;
1110-
1111-
__u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
1112-
__u64 region_ptr; /* struct io_uring_region_desc * */
1113-
1114-
struct io_uring_zcrx_offsets offsets;
1115-
__u32 zcrx_id;
1116-
__u32 rx_buf_len;
1117-
__u64 __resv[3];
1118-
};
1119-
1120-
enum zcrx_ctrl_op {
1121-
ZCRX_CTRL_FLUSH_RQ,
1122-
ZCRX_CTRL_EXPORT,
1123-
1124-
__ZCRX_CTRL_LAST,
1125-
};
1126-
1127-
struct zcrx_ctrl_flush_rq {
1128-
__u64 __resv[6];
1129-
};
1130-
1131-
struct zcrx_ctrl_export {
1132-
__u32 zcrx_fd;
1133-
__u32 __resv1[11];
1134-
};
1135-
1136-
struct zcrx_ctrl {
1137-
__u32 zcrx_id;
1138-
__u32 op; /* see enum zcrx_ctrl_op */
1139-
__u64 __resv[2];
1140-
1141-
union {
1142-
struct zcrx_ctrl_export zc_export;
1143-
struct zcrx_ctrl_flush_rq zc_flush;
1144-
};
1145-
};
1146-
11471060
#ifdef __cplusplus
11481061
}
11491062
#endif

include/uapi/linux/io_uring/zcrx.h

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
2+
/*
3+
* Header file for the io_uring zerocopy receive (zcrx) interface.
4+
*
5+
* Copyright (C) 2026 Pavel Begunkov
6+
* Copyright (C) 2026 David Wei
7+
* Copyright (C) Meta Platforms, Inc.
8+
*/
9+
#ifndef LINUX_IO_ZCRX_H
10+
#define LINUX_IO_ZCRX_H
11+
12+
#include <linux/types.h>
13+
14+
/* Zero copy receive refill queue entry */
15+
struct io_uring_zcrx_rqe {
16+
__u64 off;
17+
__u32 len;
18+
__u32 __pad;
19+
};
20+
21+
struct io_uring_zcrx_cqe {
22+
__u64 off;
23+
__u64 __pad;
24+
};
25+
26+
/* The bit from which area id is encoded into offsets */
27+
#define IORING_ZCRX_AREA_SHIFT 48
28+
#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
29+
30+
struct io_uring_zcrx_offsets {
31+
__u32 head;
32+
__u32 tail;
33+
__u32 rqes;
34+
__u32 __resv2;
35+
__u64 __resv[2];
36+
};
37+
38+
enum io_uring_zcrx_area_flags {
39+
IORING_ZCRX_AREA_DMABUF = 1,
40+
};
41+
42+
struct io_uring_zcrx_area_reg {
43+
__u64 addr;
44+
__u64 len;
45+
__u64 rq_area_token;
46+
__u32 flags;
47+
__u32 dmabuf_fd;
48+
__u64 __resv2[2];
49+
};
50+
51+
enum zcrx_reg_flags {
52+
ZCRX_REG_IMPORT = 1,
53+
};
54+
55+
enum zcrx_features {
56+
/*
57+
* The user can ask for the desired rx page size by passing the
58+
* value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
59+
*/
60+
ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
61+
};
62+
63+
/*
64+
* Argument for IORING_REGISTER_ZCRX_IFQ
65+
*/
66+
struct io_uring_zcrx_ifq_reg {
67+
__u32 if_idx;
68+
__u32 if_rxq;
69+
__u32 rq_entries;
70+
__u32 flags;
71+
72+
__u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
73+
__u64 region_ptr; /* struct io_uring_region_desc * */
74+
75+
struct io_uring_zcrx_offsets offsets;
76+
__u32 zcrx_id;
77+
__u32 rx_buf_len;
78+
__u64 __resv[3];
79+
};
80+
81+
enum zcrx_ctrl_op {
82+
ZCRX_CTRL_FLUSH_RQ,
83+
ZCRX_CTRL_EXPORT,
84+
85+
__ZCRX_CTRL_LAST,
86+
};
87+
88+
struct zcrx_ctrl_flush_rq {
89+
__u64 __resv[6];
90+
};
91+
92+
struct zcrx_ctrl_export {
93+
__u32 zcrx_fd;
94+
__u32 __resv1[11];
95+
};
96+
97+
struct zcrx_ctrl {
98+
__u32 zcrx_id;
99+
__u32 op; /* see enum zcrx_ctrl_op */
100+
__u64 __resv[2];
101+
102+
union {
103+
struct zcrx_ctrl_export zc_export;
104+
struct zcrx_ctrl_flush_rq zc_flush;
105+
};
106+
};
107+
108+
#endif /* LINUX_IO_ZCRX_H */

io_uring/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@ config IO_URING_BPF
1414
def_bool y
1515
depends on BPF
1616
depends on NET
17+
18+
config IO_URING_BPF_OPS
19+
def_bool y
20+
depends on IO_URING
21+
depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF

io_uring/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
1414
advise.o openclose.o statx.o timeout.o \
1515
cancel.o waitid.o register.o \
1616
truncate.o memmap.o alloc_cache.o \
17-
query.o
17+
query.o loop.o
1818

1919
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
2020
obj-$(CONFIG_IO_WQ) += io-wq.o
@@ -25,3 +25,4 @@ obj-$(CONFIG_NET) += net.o cmd_net.o
2525
obj-$(CONFIG_PROC_FS) += fdinfo.o
2626
obj-$(CONFIG_IO_URING_MOCK_FILE) += mock_file.o
2727
obj-$(CONFIG_IO_URING_BPF) += bpf_filter.o
28+
obj-$(CONFIG_IO_URING_BPF_OPS) += bpf-ops.o

0 commit comments

Comments
 (0)