Skip to content

Commit b520c4e

Browse files
Christoph Hellwigaxboe
authored andcommitted
block: split bio_alloc_bioset more clearly into a fast and slowpath
bio_alloc_bioset tries non-waiting slab allocations first for the bio and bvec array, but does so in a somewhat convoluted way. Restructure the function so that it first open codes these slab allocations, and then falls back to the mempools with the original gfp mask. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> -ck Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Link: https://patch.msgid.link/20260316161144.1607877-3-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent fed406f commit b520c4e

2 files changed

Lines changed: 74 additions & 109 deletions

File tree

block/bio.c

Lines changed: 73 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -176,43 +176,12 @@ static void bvec_free(struct mempool *pool, struct bio_vec *bv,
176176
* Make the first allocation restricted and don't dump info on allocation
177177
* failures, since we'll fall back to the mempool in case of failure.
178178
*/
179-
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
179+
static inline gfp_t try_alloc_gfp(gfp_t gfp)
180180
{
181181
return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
182182
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
183183
}
184184

185-
static struct bio_vec *bvec_alloc(struct mempool *pool, unsigned short *nr_vecs,
186-
gfp_t gfp_mask)
187-
{
188-
struct biovec_slab *bvs = biovec_slab(*nr_vecs);
189-
190-
if (WARN_ON_ONCE(!bvs))
191-
return NULL;
192-
193-
/*
194-
* Upgrade the nr_vecs request to take full advantage of the allocation.
195-
* We also rely on this in the bvec_free path.
196-
*/
197-
*nr_vecs = bvs->nr_vecs;
198-
199-
/*
200-
* Try a slab allocation first for all smaller allocations. If that
201-
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
202-
* The mempool is sized to handle up to BIO_MAX_VECS entries.
203-
*/
204-
if (*nr_vecs < BIO_MAX_VECS) {
205-
struct bio_vec *bvl;
206-
207-
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
208-
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
209-
return bvl;
210-
*nr_vecs = BIO_MAX_VECS;
211-
}
212-
213-
return mempool_alloc(pool, gfp_mask);
214-
}
215-
216185
void bio_uninit(struct bio *bio)
217186
{
218187
#ifdef CONFIG_BLK_CGROUP
@@ -433,13 +402,31 @@ static void bio_alloc_rescue(struct work_struct *work)
433402
}
434403
}
435404

405+
/*
406+
* submit_bio_noacct() converts recursion to iteration; this means if we're
407+
* running beneath it, any bios we allocate and submit will not be submitted
408+
* (and thus freed) until after we return.
409+
*
410+
* This exposes us to a potential deadlock if we allocate multiple bios from the
411+
* same bio_set while running underneath submit_bio_noacct(). If we were to
412+
* allocate multiple bios (say a stacking block driver that was splitting bios),
413+
* we would deadlock if we exhausted the mempool's reserve.
414+
*
415+
* We solve this, and guarantee forward progress by punting the bios on
416+
* current->bio_list to a per bio_set rescuer workqueue before blocking to wait
417+
* for elements being returned to the mempool.
418+
*/
436419
static void punt_bios_to_rescuer(struct bio_set *bs)
437420
{
438421
struct bio_list punt, nopunt;
439422
struct bio *bio;
440423

441-
if (WARN_ON_ONCE(!bs->rescue_workqueue))
424+
if (!current->bio_list || !bs->rescue_workqueue)
442425
return;
426+
if (bio_list_empty(&current->bio_list[0]) &&
427+
bio_list_empty(&current->bio_list[1]))
428+
return;
429+
443430
/*
444431
* In order to guarantee forward progress we must punt only bios that
445432
* were allocated from this bio_set; otherwise, if there was a bio on
@@ -486,9 +473,7 @@ static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
486473
local_irq_restore(flags);
487474
}
488475

489-
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
490-
unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
491-
struct bio_set *bs)
476+
static struct bio *bio_alloc_percpu_cache(struct bio_set *bs)
492477
{
493478
struct bio_alloc_cache *cache;
494479
struct bio *bio;
@@ -506,11 +491,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
506491
cache->free_list = bio->bi_next;
507492
cache->nr--;
508493
put_cpu();
509-
510-
if (nr_vecs)
511-
bio_init_inline(bio, bdev, nr_vecs, opf);
512-
else
513-
bio_init(bio, bdev, NULL, nr_vecs, opf);
514494
bio->bi_pool = bs;
515495
return bio;
516496
}
@@ -520,7 +500,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
520500
* @bdev: block device to allocate the bio for (can be %NULL)
521501
* @nr_vecs: number of bvecs to pre-allocate
522502
* @opf: operation and flags for bio
523-
* @gfp_mask: the GFP_* mask given to the slab allocator
503+
* @gfp: the GFP_* mask given to the slab allocator
524504
* @bs: the bio_set to allocate from.
525505
*
526506
* Allocate a bio from the mempools in @bs.
@@ -550,91 +530,77 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
550530
* Returns: Pointer to new bio on success, NULL on failure.
551531
*/
552532
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
553-
blk_opf_t opf, gfp_t gfp_mask,
554-
struct bio_set *bs)
533+
blk_opf_t opf, gfp_t gfp, struct bio_set *bs)
555534
{
556-
gfp_t saved_gfp = gfp_mask;
557-
struct bio *bio;
535+
struct bio_vec *bvecs = NULL;
536+
struct bio *bio = NULL;
537+
gfp_t saved_gfp = gfp;
558538
void *p;
559539

560540
/* should not use nobvec bioset for nr_vecs > 0 */
561541
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
562542
return NULL;
563543

544+
gfp = try_alloc_gfp(gfp);
564545
if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
565-
opf |= REQ_ALLOC_CACHE;
566-
bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
567-
gfp_mask, bs);
568-
if (bio)
569-
return bio;
570546
/*
571-
* No cached bio available, bio returned below marked with
572-
* REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
547+
* Set REQ_ALLOC_CACHE even if no cached bio is available to
548+
* return the allocated bio to the percpu cache when done.
573549
*/
574-
} else
550+
opf |= REQ_ALLOC_CACHE;
551+
bio = bio_alloc_percpu_cache(bs);
552+
} else {
575553
opf &= ~REQ_ALLOC_CACHE;
576-
577-
/*
578-
* submit_bio_noacct() converts recursion to iteration; this means if
579-
* we're running beneath it, any bios we allocate and submit will not be
580-
* submitted (and thus freed) until after we return.
581-
*
582-
* This exposes us to a potential deadlock if we allocate multiple bios
583-
* from the same bio_set() while running underneath submit_bio_noacct().
584-
* If we were to allocate multiple bios (say a stacking block driver
585-
* that was splitting bios), we would deadlock if we exhausted the
586-
* mempool's reserve.
587-
*
588-
* We solve this, and guarantee forward progress, with a rescuer
589-
* workqueue per bio_set. If we go to allocate and there are bios on
590-
* current->bio_list, we first try the allocation without
591-
* __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
592-
* blocking to the rescuer workqueue before we retry with the original
593-
* gfp_flags.
594-
*/
595-
if (current->bio_list &&
596-
(!bio_list_empty(&current->bio_list[0]) ||
597-
!bio_list_empty(&current->bio_list[1])) &&
598-
bs->rescue_workqueue)
599-
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
600-
601-
p = mempool_alloc(&bs->bio_pool, gfp_mask);
602-
if (!p && gfp_mask != saved_gfp) {
603-
punt_bios_to_rescuer(bs);
604-
gfp_mask = saved_gfp;
605-
p = mempool_alloc(&bs->bio_pool, gfp_mask);
554+
p = kmem_cache_alloc(bs->bio_slab, gfp);
555+
if (p)
556+
bio = p + bs->front_pad;
606557
}
607-
if (unlikely(!p))
608-
return NULL;
609-
if (!mempool_is_saturated(&bs->bio_pool))
610-
opf &= ~REQ_ALLOC_CACHE;
611558

612-
bio = p + bs->front_pad;
613-
if (nr_vecs > BIO_INLINE_VECS) {
614-
struct bio_vec *bvl = NULL;
559+
if (bio && nr_vecs > BIO_INLINE_VECS) {
560+
struct biovec_slab *bvs = biovec_slab(nr_vecs);
615561

616-
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
617-
if (!bvl && gfp_mask != saved_gfp) {
618-
punt_bios_to_rescuer(bs);
619-
gfp_mask = saved_gfp;
620-
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
562+
/*
563+
* Upgrade nr_vecs to take full advantage of the allocation.
564+
* We also rely on this in bvec_free().
565+
*/
566+
nr_vecs = bvs->nr_vecs;
567+
bvecs = kmem_cache_alloc(bvs->slab, gfp);
568+
if (unlikely(!bvecs)) {
569+
kmem_cache_free(bs->bio_slab, p);
570+
bio = NULL;
621571
}
622-
if (unlikely(!bvl))
623-
goto err_free;
572+
}
624573

625-
bio_init(bio, bdev, bvl, nr_vecs, opf);
626-
} else if (nr_vecs) {
627-
bio_init_inline(bio, bdev, BIO_INLINE_VECS, opf);
628-
} else {
629-
bio_init(bio, bdev, NULL, 0, opf);
574+
if (unlikely(!bio)) {
575+
/*
576+
* Give up if we are not allow to sleep as non-blocking mempool
577+
* allocations just go back to the slab allocation.
578+
*/
579+
if (!(saved_gfp & __GFP_DIRECT_RECLAIM))
580+
return NULL;
581+
582+
punt_bios_to_rescuer(bs);
583+
584+
/*
585+
* Don't rob the mempools by returning to the per-CPU cache if
586+
* we're tight on memory.
587+
*/
588+
opf &= ~REQ_ALLOC_CACHE;
589+
590+
p = mempool_alloc(&bs->bio_pool, gfp);
591+
bio = p + bs->front_pad;
592+
if (nr_vecs > BIO_INLINE_VECS) {
593+
nr_vecs = BIO_MAX_VECS;
594+
bvecs = mempool_alloc(&bs->bvec_pool, gfp);
595+
}
630596
}
631597

598+
if (nr_vecs && nr_vecs <= BIO_INLINE_VECS)
599+
bio_init_inline(bio, bdev, nr_vecs, opf);
600+
else
601+
bio_init(bio, bdev, bvecs, nr_vecs, opf);
632602
bio->bi_pool = bs;
633603
return bio;
634-
635-
err_free:
636-
mempool_free(p, &bs->bio_pool);
637-
return NULL;
638604
}
639605
EXPORT_SYMBOL(bio_alloc_bioset);
640606

include/linux/bio.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,7 @@ extern void bioset_exit(struct bio_set *);
350350
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
351351

352352
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
353-
blk_opf_t opf, gfp_t gfp_mask,
354-
struct bio_set *bs);
353+
blk_opf_t opf, gfp_t gfp, struct bio_set *bs);
355354
struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
356355
extern void bio_put(struct bio *);
357356

0 commit comments

Comments
 (0)