Skip to content

Commit 8a34e88

Browse files
Ming Leiaxboe
authored andcommitted
ublk: eliminate permanent pages[] array from struct ublk_buf
The pages[] array (kvmalloc'd, 8 bytes per page = 2MB for a 1GB buffer) was stored permanently in struct ublk_buf but only needed during pin_user_pages_fast() and maple tree construction. Since the maple tree already stores PFN ranges via ublk_buf_range, struct page pointers can be recovered via pfn_to_page() during unregistration. Make pages[] a temporary allocation in ublk_ctrl_reg_buf(), freed immediately after the maple tree is built. Rewrite __ublk_ctrl_unreg_buf() to iterate the maple tree for matching buf_index entries, recovering struct page pointers via pfn_to_page() and unpinning in batches of 32. Simplify ublk_buf_erase_ranges() to iterate the maple tree by buf_index instead of walking the now-removed pages[] array. Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://patch.msgid.link/20260331153207.3635125-5-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 0867704 commit 8a34e88

1 file changed

Lines changed: 55 additions & 32 deletions

File tree

drivers/block/ublk_drv.c

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ struct ublk_queue {
296296

297297
/* Per-registered shared memory buffer */
298298
struct ublk_buf {
299-
struct page **pages;
300299
unsigned int nr_pages;
301300
};
302301

@@ -5261,41 +5260,39 @@ static void ublk_unquiesce_and_resume(struct gendisk *disk)
52615260
blk_mq_unquiesce_queue(disk->queue);
52625261
}
52635262

5264-
/* Erase coalesced PFN ranges from the maple tree for pages [0, nr_pages) */
5265-
static void ublk_buf_erase_ranges(struct ublk_device *ub,
5266-
struct ublk_buf *ubuf,
5267-
unsigned long nr_pages)
5263+
/* Erase coalesced PFN ranges from the maple tree matching buf_index */
5264+
static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index)
52685265
{
5269-
unsigned long i;
5270-
5271-
for (i = 0; i < nr_pages; ) {
5272-
unsigned long pfn = page_to_pfn(ubuf->pages[i]);
5273-
unsigned long start = i;
5266+
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
5267+
struct ublk_buf_range *range;
52745268

5275-
while (i + 1 < nr_pages &&
5276-
page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
5277-
i++;
5278-
i++;
5279-
kfree(mtree_erase(&ub->buf_tree, pfn));
5269+
mas_lock(&mas);
5270+
mas_for_each(&mas, range, ULONG_MAX) {
5271+
if (range->buf_index == buf_index) {
5272+
mas_erase(&mas);
5273+
kfree(range);
5274+
}
52805275
}
5276+
mas_unlock(&mas);
52815277
}
52825278

52835279
static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
5284-
struct ublk_buf *ubuf, int index,
5280+
struct ublk_buf *ubuf,
5281+
struct page **pages, int index,
52855282
unsigned short flags)
52865283
{
52875284
unsigned long nr_pages = ubuf->nr_pages;
52885285
unsigned long i;
52895286
int ret;
52905287

52915288
for (i = 0; i < nr_pages; ) {
5292-
unsigned long pfn = page_to_pfn(ubuf->pages[i]);
5289+
unsigned long pfn = page_to_pfn(pages[i]);
52935290
unsigned long start = i;
52945291
struct ublk_buf_range *range;
52955292

52965293
/* Find run of consecutive PFNs */
52975294
while (i + 1 < nr_pages &&
5298-
page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
5295+
page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1)
52995296
i++;
53005297
i++; /* past the last page in this run */
53015298

@@ -5320,7 +5317,7 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
53205317
return 0;
53215318

53225319
unwind:
5323-
ublk_buf_erase_ranges(ub, ubuf, i);
5320+
ublk_buf_erase_ranges(ub, index);
53245321
return ret;
53255322
}
53265323

@@ -5335,6 +5332,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
53355332
void __user *argp = (void __user *)(unsigned long)header->addr;
53365333
struct ublk_shmem_buf_reg buf_reg;
53375334
unsigned long addr, size, nr_pages;
5335+
struct page **pages = NULL;
53385336
unsigned int gup_flags;
53395337
struct gendisk *disk;
53405338
struct ublk_buf *ubuf;
@@ -5371,9 +5369,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
53715369
goto put_disk;
53725370
}
53735371

5374-
ubuf->pages = kvmalloc_array(nr_pages, sizeof(*ubuf->pages),
5375-
GFP_KERNEL);
5376-
if (!ubuf->pages) {
5372+
pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
5373+
if (!pages) {
53775374
ret = -ENOMEM;
53785375
goto err_free;
53795376
}
@@ -5382,7 +5379,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
53825379
if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY))
53835380
gup_flags |= FOLL_WRITE;
53845381

5385-
pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, ubuf->pages);
5382+
pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, pages);
53865383
if (pinned < 0) {
53875384
ret = pinned;
53885385
goto err_free_pages;
@@ -5406,14 +5403,15 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
54065403
if (ret)
54075404
goto err_unlock;
54085405

5409-
ret = __ublk_ctrl_reg_buf(ub, ubuf, index, buf_reg.flags);
5406+
ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags);
54105407
if (ret) {
54115408
xa_erase(&ub->bufs_xa, index);
54125409
goto err_unlock;
54135410
}
54145411

54155412
mutex_unlock(&ub->mutex);
54165413

5414+
kvfree(pages);
54175415
ublk_unquiesce_and_resume(disk);
54185416
ublk_put_disk(disk);
54195417
return index;
@@ -5422,9 +5420,9 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
54225420
mutex_unlock(&ub->mutex);
54235421
ublk_unquiesce_and_resume(disk);
54245422
err_unpin:
5425-
unpin_user_pages(ubuf->pages, pinned);
5423+
unpin_user_pages(pages, pinned);
54265424
err_free_pages:
5427-
kvfree(ubuf->pages);
5425+
kvfree(pages);
54285426
err_free:
54295427
kfree(ubuf);
54305428
put_disk:
@@ -5433,11 +5431,36 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
54335431
}
54345432

54355433
static void __ublk_ctrl_unreg_buf(struct ublk_device *ub,
5436-
struct ublk_buf *ubuf)
5434+
struct ublk_buf *ubuf, int buf_index)
54375435
{
5438-
ublk_buf_erase_ranges(ub, ubuf, ubuf->nr_pages);
5439-
unpin_user_pages(ubuf->pages, ubuf->nr_pages);
5440-
kvfree(ubuf->pages);
5436+
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
5437+
struct ublk_buf_range *range;
5438+
struct page *pages[32];
5439+
5440+
mas_lock(&mas);
5441+
mas_for_each(&mas, range, ULONG_MAX) {
5442+
unsigned long base, nr, off;
5443+
5444+
if (range->buf_index != buf_index)
5445+
continue;
5446+
5447+
base = range->base_pfn;
5448+
nr = mas.last - mas.index + 1;
5449+
mas_erase(&mas);
5450+
5451+
for (off = 0; off < nr; ) {
5452+
unsigned int batch = min_t(unsigned long,
5453+
nr - off, 32);
5454+
unsigned int j;
5455+
5456+
for (j = 0; j < batch; j++)
5457+
pages[j] = pfn_to_page(base + off + j);
5458+
unpin_user_pages(pages, batch);
5459+
off += batch;
5460+
}
5461+
kfree(range);
5462+
}
5463+
mas_unlock(&mas);
54415464
kfree(ubuf);
54425465
}
54435466

@@ -5468,7 +5491,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
54685491
return -ENOENT;
54695492
}
54705493

5471-
__ublk_ctrl_unreg_buf(ub, ubuf);
5494+
__ublk_ctrl_unreg_buf(ub, ubuf, index);
54725495

54735496
mutex_unlock(&ub->mutex);
54745497

@@ -5483,7 +5506,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub)
54835506
unsigned long index;
54845507

54855508
xa_for_each(&ub->bufs_xa, index, ubuf)
5486-
__ublk_ctrl_unreg_buf(ub, ubuf);
5509+
__ublk_ctrl_unreg_buf(ub, ubuf, index);
54875510
xa_destroy(&ub->bufs_xa);
54885511
mtree_destroy(&ub->buf_tree);
54895512
}

0 commit comments

Comments
 (0)