Skip to content

Commit 795663b

Browse files
isilenceaxboe
authored andcommitted
io_uring/zcrx: implement large rx buffer support
There are network cards that support receive buffers larger than 4K, and that can be vastly beneficial for performance, and benchmarks for this patch showed up to 30% CPU util improvement for 32K vs 4K buffers. Allows zcrx users to specify the size in struct io_uring_zcrx_ifq_reg::rx_buf_len. If set to zero, zcrx will use a default value. zcrx will check and fail if the memory backing the area can't be split into physically contiguous chunks of the required size. It's more restrictive as it only needs dma addresses to be contig, but that's beyond this series. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> [axboe: kill duplicate netdev_queues.h include] Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d1de61d commit 795663b

2 files changed

Lines changed: 34 additions & 6 deletions

File tree

include/uapi/linux/io_uring.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,7 @@ struct io_uring_zcrx_ifq_reg {
10821082

10831083
struct io_uring_zcrx_offsets offsets;
10841084
__u32 zcrx_id;
1085-
__u32 __resv2;
1085+
__u32 rx_buf_len;
10861086
__u64 __resv[3];
10871087
};
10881088

io_uring/zcrx.c

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
5555
return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
5656
}
5757

58+
static int io_area_max_shift(struct io_zcrx_mem *mem)
59+
{
60+
struct sg_table *sgt = mem->sgt;
61+
struct scatterlist *sg;
62+
unsigned shift = -1U;
63+
unsigned i;
64+
65+
for_each_sgtable_dma_sg(sgt, sg, i)
66+
shift = min(shift, __ffs(sg->length));
67+
return shift;
68+
}
69+
5870
static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
5971
struct io_zcrx_area *area)
6072
{
@@ -416,12 +428,21 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
416428
}
417429

418430
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
419-
struct io_uring_zcrx_area_reg *area_reg)
431+
struct io_uring_zcrx_area_reg *area_reg,
432+
struct io_uring_zcrx_ifq_reg *reg)
420433
{
434+
int buf_size_shift = PAGE_SHIFT;
421435
struct io_zcrx_area *area;
422436
unsigned nr_iovs;
423437
int i, ret;
424438

439+
if (reg->rx_buf_len) {
440+
if (!is_power_of_2(reg->rx_buf_len) ||
441+
reg->rx_buf_len < PAGE_SIZE)
442+
return -EINVAL;
443+
buf_size_shift = ilog2(reg->rx_buf_len);
444+
}
445+
425446
ret = -ENOMEM;
426447
area = kzalloc(sizeof(*area), GFP_KERNEL);
427448
if (!area)
@@ -432,7 +453,12 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
432453
if (ret)
433454
goto err;
434455

435-
ifq->niov_shift = PAGE_SHIFT;
456+
if (buf_size_shift > io_area_max_shift(&area->mem)) {
457+
ret = -ERANGE;
458+
goto err;
459+
}
460+
461+
ifq->niov_shift = buf_size_shift;
436462
nr_iovs = area->mem.size >> ifq->niov_shift;
437463
area->nia.num_niovs = nr_iovs;
438464

@@ -742,8 +768,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
742768
return -EINVAL;
743769
if (copy_from_user(&reg, arg, sizeof(reg)))
744770
return -EFAULT;
745-
if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
746-
reg.__resv2 || reg.zcrx_id)
771+
if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) || reg.zcrx_id)
747772
return -EINVAL;
748773
if (reg.flags & ZCRX_REG_IMPORT)
749774
return import_zcrx(ctx, arg, &reg);
@@ -800,10 +825,11 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
800825
}
801826
get_device(ifq->dev);
802827

803-
ret = io_zcrx_create_area(ifq, &area);
828+
ret = io_zcrx_create_area(ifq, &area, &reg);
804829
if (ret)
805830
goto netdev_put_unlock;
806831

832+
mp_param.rx_page_size = 1U << ifq->niov_shift;
807833
mp_param.mp_ops = &io_uring_pp_zc_ops;
808834
mp_param.mp_priv = ifq;
809835
ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
@@ -821,6 +847,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
821847
goto err;
822848
}
823849

850+
reg.rx_buf_len = 1U << ifq->niov_shift;
851+
824852
if (copy_to_user(arg, &reg, sizeof(reg)) ||
825853
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
826854
copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {

0 commit comments

Comments
 (0)