@@ -176,43 +176,12 @@ static void bvec_free(struct mempool *pool, struct bio_vec *bv,
176176 * Make the first allocation restricted and don't dump info on allocation
177177 * failures, since we'll fall back to the mempool in case of failure.
178178 */
179- static inline gfp_t bvec_alloc_gfp (gfp_t gfp )
179+ static inline gfp_t try_alloc_gfp (gfp_t gfp )
180180{
181181 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO )) |
182182 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN ;
183183}
184184
185- static struct bio_vec * bvec_alloc (struct mempool * pool , unsigned short * nr_vecs ,
186- gfp_t gfp_mask )
187- {
188- struct biovec_slab * bvs = biovec_slab (* nr_vecs );
189-
190- if (WARN_ON_ONCE (!bvs ))
191- return NULL ;
192-
193- /*
194- * Upgrade the nr_vecs request to take full advantage of the allocation.
195- * We also rely on this in the bvec_free path.
196- */
197- * nr_vecs = bvs -> nr_vecs ;
198-
199- /*
200- * Try a slab allocation first for all smaller allocations. If that
201- * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
202- * The mempool is sized to handle up to BIO_MAX_VECS entries.
203- */
204- if (* nr_vecs < BIO_MAX_VECS ) {
205- struct bio_vec * bvl ;
206-
207- bvl = kmem_cache_alloc (bvs -> slab , bvec_alloc_gfp (gfp_mask ));
208- if (likely (bvl ) || !(gfp_mask & __GFP_DIRECT_RECLAIM ))
209- return bvl ;
210- * nr_vecs = BIO_MAX_VECS ;
211- }
212-
213- return mempool_alloc (pool , gfp_mask );
214- }
215-
216185void bio_uninit (struct bio * bio )
217186{
218187#ifdef CONFIG_BLK_CGROUP
@@ -433,13 +402,31 @@ static void bio_alloc_rescue(struct work_struct *work)
433402 }
434403}
435404
405+ /*
406+ * submit_bio_noacct() converts recursion to iteration; this means if we're
407+ * running beneath it, any bios we allocate and submit will not be submitted
408+ * (and thus freed) until after we return.
409+ *
410+ * This exposes us to a potential deadlock if we allocate multiple bios from the
411+ * same bio_set while running underneath submit_bio_noacct(). If we were to
412+ * allocate multiple bios (say a stacking block driver that was splitting bios),
413+ * we would deadlock if we exhausted the mempool's reserve.
414+ *
415+ * We solve this, and guarantee forward progress by punting the bios on
416+ * current->bio_list to a per bio_set rescuer workqueue before blocking to wait
417+ * for elements being returned to the mempool.
418+ */
436419static void punt_bios_to_rescuer (struct bio_set * bs )
437420{
438421 struct bio_list punt , nopunt ;
439422 struct bio * bio ;
440423
441- if (WARN_ON_ONCE (! bs -> rescue_workqueue ) )
424+ if (! current -> bio_list || ! bs -> rescue_workqueue )
442425 return ;
426+ if (bio_list_empty (& current -> bio_list [0 ]) &&
427+ bio_list_empty (& current -> bio_list [1 ]))
428+ return ;
429+
443430 /*
444431 * In order to guarantee forward progress we must punt only bios that
445432 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -486,9 +473,7 @@ static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
486473 local_irq_restore (flags );
487474}
488475
489- static struct bio * bio_alloc_percpu_cache (struct block_device * bdev ,
490- unsigned short nr_vecs , blk_opf_t opf , gfp_t gfp ,
491- struct bio_set * bs )
476+ static struct bio * bio_alloc_percpu_cache (struct bio_set * bs )
492477{
493478 struct bio_alloc_cache * cache ;
494479 struct bio * bio ;
@@ -506,11 +491,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
506491 cache -> free_list = bio -> bi_next ;
507492 cache -> nr -- ;
508493 put_cpu ();
509-
510- if (nr_vecs )
511- bio_init_inline (bio , bdev , nr_vecs , opf );
512- else
513- bio_init (bio , bdev , NULL , nr_vecs , opf );
514494 bio -> bi_pool = bs ;
515495 return bio ;
516496}
@@ -520,7 +500,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
520500 * @bdev: block device to allocate the bio for (can be %NULL)
521501 * @nr_vecs: number of bvecs to pre-allocate
522502 * @opf: operation and flags for bio
523- * @gfp_mask: the GFP_* mask given to the slab allocator
503+ * @gfp: the GFP_* mask given to the slab allocator
524504 * @bs: the bio_set to allocate from.
525505 *
526506 * Allocate a bio from the mempools in @bs.
@@ -550,91 +530,77 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
550530 * Returns: Pointer to new bio on success, NULL on failure.
551531 */
552532struct bio * bio_alloc_bioset (struct block_device * bdev , unsigned short nr_vecs ,
553- blk_opf_t opf , gfp_t gfp_mask ,
554- struct bio_set * bs )
533+ blk_opf_t opf , gfp_t gfp , struct bio_set * bs )
555534{
556- gfp_t saved_gfp = gfp_mask ;
557- struct bio * bio ;
535+ struct bio_vec * bvecs = NULL ;
536+ struct bio * bio = NULL ;
537+ gfp_t saved_gfp = gfp ;
558538 void * p ;
559539
560540 /* should not use nobvec bioset for nr_vecs > 0 */
561541 if (WARN_ON_ONCE (!mempool_initialized (& bs -> bvec_pool ) && nr_vecs > 0 ))
562542 return NULL ;
563543
544+ gfp = try_alloc_gfp (gfp );
564545 if (bs -> cache && nr_vecs <= BIO_INLINE_VECS ) {
565- opf |= REQ_ALLOC_CACHE ;
566- bio = bio_alloc_percpu_cache (bdev , nr_vecs , opf ,
567- gfp_mask , bs );
568- if (bio )
569- return bio ;
570546 /*
571- * No cached bio available, bio returned below marked with
572- * REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
547+ * Set REQ_ALLOC_CACHE even if no cached bio is available to
548+ * return the allocated bio to the percpu cache when done .
573549 */
574- } else
550+ opf |= REQ_ALLOC_CACHE ;
551+ bio = bio_alloc_percpu_cache (bs );
552+ } else {
575553 opf &= ~REQ_ALLOC_CACHE ;
576-
577- /*
578- * submit_bio_noacct() converts recursion to iteration; this means if
579- * we're running beneath it, any bios we allocate and submit will not be
580- * submitted (and thus freed) until after we return.
581- *
582- * This exposes us to a potential deadlock if we allocate multiple bios
583- * from the same bio_set() while running underneath submit_bio_noacct().
584- * If we were to allocate multiple bios (say a stacking block driver
585- * that was splitting bios), we would deadlock if we exhausted the
586- * mempool's reserve.
587- *
588- * We solve this, and guarantee forward progress, with a rescuer
589- * workqueue per bio_set. If we go to allocate and there are bios on
590- * current->bio_list, we first try the allocation without
591- * __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
592- * blocking to the rescuer workqueue before we retry with the original
593- * gfp_flags.
594- */
595- if (current -> bio_list &&
596- (!bio_list_empty (& current -> bio_list [0 ]) ||
597- !bio_list_empty (& current -> bio_list [1 ])) &&
598- bs -> rescue_workqueue )
599- gfp_mask &= ~__GFP_DIRECT_RECLAIM ;
600-
601- p = mempool_alloc (& bs -> bio_pool , gfp_mask );
602- if (!p && gfp_mask != saved_gfp ) {
603- punt_bios_to_rescuer (bs );
604- gfp_mask = saved_gfp ;
605- p = mempool_alloc (& bs -> bio_pool , gfp_mask );
554+ p = kmem_cache_alloc (bs -> bio_slab , gfp );
555+ if (p )
556+ bio = p + bs -> front_pad ;
606557 }
607- if (unlikely (!p ))
608- return NULL ;
609- if (!mempool_is_saturated (& bs -> bio_pool ))
610- opf &= ~REQ_ALLOC_CACHE ;
611558
612- bio = p + bs -> front_pad ;
613- if (nr_vecs > BIO_INLINE_VECS ) {
614- struct bio_vec * bvl = NULL ;
559+ if (bio && nr_vecs > BIO_INLINE_VECS ) {
560+ struct biovec_slab * bvs = biovec_slab (nr_vecs );
615561
616- bvl = bvec_alloc (& bs -> bvec_pool , & nr_vecs , gfp_mask );
617- if (!bvl && gfp_mask != saved_gfp ) {
618- punt_bios_to_rescuer (bs );
619- gfp_mask = saved_gfp ;
620- bvl = bvec_alloc (& bs -> bvec_pool , & nr_vecs , gfp_mask );
562+ /*
563+ * Upgrade nr_vecs to take full advantage of the allocation.
564+ * We also rely on this in bvec_free().
565+ */
566+ nr_vecs = bvs -> nr_vecs ;
567+ bvecs = kmem_cache_alloc (bvs -> slab , gfp );
568+ if (unlikely (!bvecs )) {
569+ kmem_cache_free (bs -> bio_slab , p );
570+ bio = NULL ;
621571 }
622- if (unlikely (!bvl ))
623- goto err_free ;
572+ }
624573
625- bio_init (bio , bdev , bvl , nr_vecs , opf );
626- } else if (nr_vecs ) {
627- bio_init_inline (bio , bdev , BIO_INLINE_VECS , opf );
628- } else {
629- bio_init (bio , bdev , NULL , 0 , opf );
574+ if (unlikely (!bio )) {
575+ /*
576+ * Give up if we are not allow to sleep as non-blocking mempool
577+ * allocations just go back to the slab allocation.
578+ */
579+ if (!(saved_gfp & __GFP_DIRECT_RECLAIM ))
580+ return NULL ;
581+
582+ punt_bios_to_rescuer (bs );
583+
584+ /*
585+ * Don't rob the mempools by returning to the per-CPU cache if
586+ * we're tight on memory.
587+ */
588+ opf &= ~REQ_ALLOC_CACHE ;
589+
590+ p = mempool_alloc (& bs -> bio_pool , gfp );
591+ bio = p + bs -> front_pad ;
592+ if (nr_vecs > BIO_INLINE_VECS ) {
593+ nr_vecs = BIO_MAX_VECS ;
594+ bvecs = mempool_alloc (& bs -> bvec_pool , gfp );
595+ }
630596 }
631597
598+ if (nr_vecs && nr_vecs <= BIO_INLINE_VECS )
599+ bio_init_inline (bio , bdev , nr_vecs , opf );
600+ else
601+ bio_init (bio , bdev , bvecs , nr_vecs , opf );
632602 bio -> bi_pool = bs ;
633603 return bio ;
634-
635- err_free :
636- mempool_free (p , & bs -> bio_pool );
637- return NULL ;
638604}
639605EXPORT_SYMBOL (bio_alloc_bioset );
640606
0 commit comments