@@ -34,6 +34,8 @@ struct bio_alloc_cache {
3434 unsigned int nr_irq ;
3535};
3636
37+ #define BIO_INLINE_VECS 4
38+
3739static struct biovec_slab {
3840 int nr_vecs ;
3941 char * name ;
@@ -159,57 +161,16 @@ static void bio_put_slab(struct bio_set *bs)
159161 mutex_unlock (& bio_slab_lock );
160162}
161163
162- void bvec_free (mempool_t * pool , struct bio_vec * bv , unsigned short nr_vecs )
163- {
164- BUG_ON (nr_vecs > BIO_MAX_VECS );
165-
166- if (nr_vecs == BIO_MAX_VECS )
167- mempool_free (bv , pool );
168- else if (nr_vecs > BIO_INLINE_VECS )
169- kmem_cache_free (biovec_slab (nr_vecs )-> slab , bv );
170- }
171-
172164/*
173165 * Make the first allocation restricted and don't dump info on allocation
174166 * failures, since we'll fall back to the mempool in case of failure.
175167 */
176- static inline gfp_t bvec_alloc_gfp (gfp_t gfp )
168+ static inline gfp_t try_alloc_gfp (gfp_t gfp )
177169{
178170 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO )) |
179171 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN ;
180172}
181173
182- struct bio_vec * bvec_alloc (mempool_t * pool , unsigned short * nr_vecs ,
183- gfp_t gfp_mask )
184- {
185- struct biovec_slab * bvs = biovec_slab (* nr_vecs );
186-
187- if (WARN_ON_ONCE (!bvs ))
188- return NULL ;
189-
190- /*
191- * Upgrade the nr_vecs request to take full advantage of the allocation.
192- * We also rely on this in the bvec_free path.
193- */
194- * nr_vecs = bvs -> nr_vecs ;
195-
196- /*
197- * Try a slab allocation first for all smaller allocations. If that
198- * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
199- * The mempool is sized to handle up to BIO_MAX_VECS entries.
200- */
201- if (* nr_vecs < BIO_MAX_VECS ) {
202- struct bio_vec * bvl ;
203-
204- bvl = kmem_cache_alloc (bvs -> slab , bvec_alloc_gfp (gfp_mask ));
205- if (likely (bvl ) || !(gfp_mask & __GFP_DIRECT_RECLAIM ))
206- return bvl ;
207- * nr_vecs = BIO_MAX_VECS ;
208- }
209-
210- return mempool_alloc (pool , gfp_mask );
211- }
212-
213174void bio_uninit (struct bio * bio )
214175{
215176#ifdef CONFIG_BLK_CGROUP
@@ -231,9 +192,14 @@ static void bio_free(struct bio *bio)
231192 void * p = bio ;
232193
233194 WARN_ON_ONCE (!bs );
195+ WARN_ON_ONCE (bio -> bi_max_vecs > BIO_MAX_VECS );
234196
235197 bio_uninit (bio );
236- bvec_free (& bs -> bvec_pool , bio -> bi_io_vec , bio -> bi_max_vecs );
198+ if (bio -> bi_max_vecs == BIO_MAX_VECS )
199+ mempool_free (bio -> bi_io_vec , & bs -> bvec_pool );
200+ else if (bio -> bi_max_vecs > BIO_INLINE_VECS )
201+ kmem_cache_free (biovec_slab (bio -> bi_max_vecs )-> slab ,
202+ bio -> bi_io_vec );
237203 mempool_free (p - bs -> front_pad , & bs -> bio_pool );
238204}
239205
@@ -430,13 +396,31 @@ static void bio_alloc_rescue(struct work_struct *work)
430396 }
431397}
432398
399+ /*
400+ * submit_bio_noacct() converts recursion to iteration; this means if we're
401+ * running beneath it, any bios we allocate and submit will not be submitted
402+ * (and thus freed) until after we return.
403+ *
404+ * This exposes us to a potential deadlock if we allocate multiple bios from the
405+ * same bio_set while running underneath submit_bio_noacct(). If we were to
406+ * allocate multiple bios (say a stacking block driver that was splitting bios),
407+ * we would deadlock if we exhausted the mempool's reserve.
408+ *
409+ * We solve this, and guarantee forward progress by punting the bios on
410+ * current->bio_list to a per bio_set rescuer workqueue before blocking to wait
411+ * for elements being returned to the mempool.
412+ */
433413static void punt_bios_to_rescuer (struct bio_set * bs )
434414{
435415 struct bio_list punt , nopunt ;
436416 struct bio * bio ;
437417
438- if (WARN_ON_ONCE (! bs -> rescue_workqueue ) )
418+ if (! current -> bio_list || ! bs -> rescue_workqueue )
439419 return ;
420+ if (bio_list_empty (& current -> bio_list [0 ]) &&
421+ bio_list_empty (& current -> bio_list [1 ]))
422+ return ;
423+
440424 /*
441425 * In order to guarantee forward progress we must punt only bios that
442426 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -483,9 +467,7 @@ static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
483467 local_irq_restore (flags );
484468}
485469
486- static struct bio * bio_alloc_percpu_cache (struct block_device * bdev ,
487- unsigned short nr_vecs , blk_opf_t opf , gfp_t gfp ,
488- struct bio_set * bs )
470+ static struct bio * bio_alloc_percpu_cache (struct bio_set * bs )
489471{
490472 struct bio_alloc_cache * cache ;
491473 struct bio * bio ;
@@ -503,11 +485,6 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
503485 cache -> free_list = bio -> bi_next ;
504486 cache -> nr -- ;
505487 put_cpu ();
506-
507- if (nr_vecs )
508- bio_init_inline (bio , bdev , nr_vecs , opf );
509- else
510- bio_init (bio , bdev , NULL , nr_vecs , opf );
511488 bio -> bi_pool = bs ;
512489 return bio ;
513490}
@@ -517,7 +494,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
517494 * @bdev: block device to allocate the bio for (can be %NULL)
518495 * @nr_vecs: number of bvecs to pre-allocate
519496 * @opf: operation and flags for bio
520- * @gfp_mask: the GFP_* mask given to the slab allocator
497+ * @gfp: the GFP_* mask given to the slab allocator
521498 * @bs: the bio_set to allocate from.
522499 *
523500 * Allocate a bio from the mempools in @bs.
@@ -547,91 +524,77 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
547524 * Returns: Pointer to new bio on success, NULL on failure.
548525 */
549526struct bio * bio_alloc_bioset (struct block_device * bdev , unsigned short nr_vecs ,
550- blk_opf_t opf , gfp_t gfp_mask ,
551- struct bio_set * bs )
527+ blk_opf_t opf , gfp_t gfp , struct bio_set * bs )
552528{
553- gfp_t saved_gfp = gfp_mask ;
554- struct bio * bio ;
529+ struct bio_vec * bvecs = NULL ;
530+ struct bio * bio = NULL ;
531+ gfp_t saved_gfp = gfp ;
555532 void * p ;
556533
557534 /* should not use nobvec bioset for nr_vecs > 0 */
558535 if (WARN_ON_ONCE (!mempool_initialized (& bs -> bvec_pool ) && nr_vecs > 0 ))
559536 return NULL ;
560537
538+ gfp = try_alloc_gfp (gfp );
561539 if (bs -> cache && nr_vecs <= BIO_INLINE_VECS ) {
562- opf |= REQ_ALLOC_CACHE ;
563- bio = bio_alloc_percpu_cache (bdev , nr_vecs , opf ,
564- gfp_mask , bs );
565- if (bio )
566- return bio ;
567540 /*
568- * No cached bio available, bio returned below marked with
569- * REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
541+ * Set REQ_ALLOC_CACHE even if no cached bio is available to
542+ * return the allocated bio to the percpu cache when done .
570543 */
571- } else
544+ opf |= REQ_ALLOC_CACHE ;
545+ bio = bio_alloc_percpu_cache (bs );
546+ } else {
572547 opf &= ~REQ_ALLOC_CACHE ;
573-
574- /*
575- * submit_bio_noacct() converts recursion to iteration; this means if
576- * we're running beneath it, any bios we allocate and submit will not be
577- * submitted (and thus freed) until after we return.
578- *
579- * This exposes us to a potential deadlock if we allocate multiple bios
580- * from the same bio_set() while running underneath submit_bio_noacct().
581- * If we were to allocate multiple bios (say a stacking block driver
582- * that was splitting bios), we would deadlock if we exhausted the
583- * mempool's reserve.
584- *
585- * We solve this, and guarantee forward progress, with a rescuer
586- * workqueue per bio_set. If we go to allocate and there are bios on
587- * current->bio_list, we first try the allocation without
588- * __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
589- * blocking to the rescuer workqueue before we retry with the original
590- * gfp_flags.
591- */
592- if (current -> bio_list &&
593- (!bio_list_empty (& current -> bio_list [0 ]) ||
594- !bio_list_empty (& current -> bio_list [1 ])) &&
595- bs -> rescue_workqueue )
596- gfp_mask &= ~__GFP_DIRECT_RECLAIM ;
597-
598- p = mempool_alloc (& bs -> bio_pool , gfp_mask );
599- if (!p && gfp_mask != saved_gfp ) {
600- punt_bios_to_rescuer (bs );
601- gfp_mask = saved_gfp ;
602- p = mempool_alloc (& bs -> bio_pool , gfp_mask );
548+ p = kmem_cache_alloc (bs -> bio_slab , gfp );
549+ if (p )
550+ bio = p + bs -> front_pad ;
603551 }
604- if (unlikely (!p ))
605- return NULL ;
606- if (!mempool_is_saturated (& bs -> bio_pool ))
607- opf &= ~REQ_ALLOC_CACHE ;
608552
609- bio = p + bs -> front_pad ;
610- if (nr_vecs > BIO_INLINE_VECS ) {
611- struct bio_vec * bvl = NULL ;
553+ if (bio && nr_vecs > BIO_INLINE_VECS ) {
554+ struct biovec_slab * bvs = biovec_slab (nr_vecs );
612555
613- bvl = bvec_alloc (& bs -> bvec_pool , & nr_vecs , gfp_mask );
614- if (!bvl && gfp_mask != saved_gfp ) {
615- punt_bios_to_rescuer (bs );
616- gfp_mask = saved_gfp ;
617- bvl = bvec_alloc (& bs -> bvec_pool , & nr_vecs , gfp_mask );
556+ /*
557+ * Upgrade nr_vecs to take full advantage of the allocation.
558+ * We also rely on this in bio_free().
559+ */
560+ nr_vecs = bvs -> nr_vecs ;
561+ bvecs = kmem_cache_alloc (bvs -> slab , gfp );
562+ if (unlikely (!bvecs )) {
563+ kmem_cache_free (bs -> bio_slab , p );
564+ bio = NULL ;
618565 }
619- if (unlikely (!bvl ))
620- goto err_free ;
566+ }
621567
622- bio_init (bio , bdev , bvl , nr_vecs , opf );
623- } else if (nr_vecs ) {
624- bio_init_inline (bio , bdev , BIO_INLINE_VECS , opf );
625- } else {
626- bio_init (bio , bdev , NULL , 0 , opf );
568+ if (unlikely (!bio )) {
569+ /*
570+ * Give up if we are not allow to sleep as non-blocking mempool
571+ * allocations just go back to the slab allocation.
572+ */
573+ if (!(saved_gfp & __GFP_DIRECT_RECLAIM ))
574+ return NULL ;
575+
576+ punt_bios_to_rescuer (bs );
577+
578+ /*
579+ * Don't rob the mempools by returning to the per-CPU cache if
580+ * we're tight on memory.
581+ */
582+ opf &= ~REQ_ALLOC_CACHE ;
583+
584+ p = mempool_alloc (& bs -> bio_pool , gfp );
585+ bio = p + bs -> front_pad ;
586+ if (nr_vecs > BIO_INLINE_VECS ) {
587+ nr_vecs = BIO_MAX_VECS ;
588+ bvecs = mempool_alloc (& bs -> bvec_pool , gfp );
589+ }
627590 }
628591
592+ if (nr_vecs && nr_vecs <= BIO_INLINE_VECS )
593+ bio_init_inline (bio , bdev , nr_vecs , opf );
594+ else
595+ bio_init (bio , bdev , bvecs , nr_vecs , opf );
629596 bio -> bi_pool = bs ;
630597 return bio ;
631-
632- err_free :
633- mempool_free (p , & bs -> bio_pool );
634- return NULL ;
635598}
636599EXPORT_SYMBOL (bio_alloc_bioset );
637600
0 commit comments