Skip to content

Commit 7ae37b2

Browse files
JP Kobryn (Meta)kdave
authored andcommitted
btrfs: prevent direct reclaim during compressed readahead
Under memory pressure, direct reclaim can kick in during compressed readahead. This puts the associated task into D-state. Then shrink_lruvec() disables interrupts when acquiring the LRU lock. Under heavy pressure, we've observed reclaim can run long enough that the CPU becomes prone to CSD lock stalls since it cannot service incoming IPIs. Although the CSD lock stalls are the worst case scenario, we have found many more subtle occurrences of this latency on the order of seconds, over a minute in some cases. Prevent direct reclaim during compressed readahead. This is achieved by using different GFP flags at key points when the bio is marked for readahead. There are two functions that allocate during compressed readahead: btrfs_alloc_compr_folio() and add_ra_bio_pages(). Both currently use GFP_NOFS which includes __GFP_DIRECT_RECLAIM. For the internal API call btrfs_alloc_compr_folio(), the signature changes to accept an additional gfp_t parameter. At the readahead call site, it gets flags similar to GFP_NOFS but stripped of __GFP_DIRECT_RECLAIM. __GFP_NOWARN is added since these allocations are allowed to fail. Demand reads still use full GFP_NOFS and will enter reclaim if needed. All other existing call sites of btrfs_alloc_compr_folio() now explicitly pass GFP_NOFS to retain their current behavior. add_ra_bio_pages() gains a bool parameter which allows callers to specify if they want to allow direct reclaim or not. In either case, the __GFP_NOWARN flag was added unconditionally since the allocations are speculative. There has been some previous work done on calling add_ra_bio_pages() [0]. This patch is complementary: where that patch reduces call frequency, this patch reduces the latency associated with those calls. [0] https://lore.kernel.org/linux-btrfs/656838ec1232314a2657716e59f4f15a8eadba64.1751492111.git.boris@bur.io/ Reviewed-by: Mark Harmstone <mark@harmstone.com> Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: JP Kobryn (Meta) <jp.kobryn@linux.dev> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 30d537f commit 7ae37b2

6 files changed

Lines changed: 45 additions & 18 deletions

File tree

fs/btrfs/compression.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_co
180180
/*
181181
* Common wrappers for page allocation from compression wrappers
182182
*/
183-
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info)
183+
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info, gfp_t gfp)
184184
{
185185
struct folio *folio = NULL;
186186

@@ -200,7 +200,7 @@ struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info)
200200
return folio;
201201

202202
alloc:
203-
return folio_alloc(GFP_NOFS, fs_info->block_min_order);
203+
return folio_alloc(gfp, fs_info->block_min_order);
204204
}
205205

206206
void btrfs_free_compr_folio(struct folio *folio)
@@ -368,14 +368,16 @@ struct compressed_bio *btrfs_alloc_compressed_write(struct btrfs_inode *inode,
368368
static noinline int add_ra_bio_pages(struct inode *inode,
369369
u64 compressed_end,
370370
struct compressed_bio *cb,
371-
int *memstall, unsigned long *pflags)
371+
int *memstall, unsigned long *pflags,
372+
bool direct_reclaim)
372373
{
373374
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
374375
pgoff_t end_index;
375376
struct bio *orig_bio = &cb->orig_bbio->bio;
376377
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
377378
u64 isize = i_size_read(inode);
378379
int ret;
380+
gfp_t constraint_gfp, cache_gfp;
379381
struct folio *folio;
380382
struct extent_map *em;
381383
struct address_space *mapping = inode->i_mapping;
@@ -405,6 +407,19 @@ static noinline int add_ra_bio_pages(struct inode *inode,
405407

406408
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
407409

410+
/*
411+
* Avoid direct reclaim when the caller does not allow it. Since
412+
* add_ra_bio_pages() is always speculative, suppress allocation warnings
413+
* in either case.
414+
*/
415+
if (!direct_reclaim) {
416+
constraint_gfp = ~(__GFP_FS | __GFP_DIRECT_RECLAIM) | __GFP_NOWARN;
417+
cache_gfp = (GFP_NOFS & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN;
418+
} else {
419+
constraint_gfp = (~__GFP_FS) | __GFP_NOWARN;
420+
cache_gfp = GFP_NOFS | __GFP_NOWARN;
421+
}
422+
408423
while (cur < compressed_end) {
409424
pgoff_t page_end;
410425
pgoff_t pg_index = cur >> PAGE_SHIFT;
@@ -434,12 +449,12 @@ static noinline int add_ra_bio_pages(struct inode *inode,
434449
continue;
435450
}
436451

437-
folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS),
452+
folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, constraint_gfp),
438453
0, NULL);
439454
if (!folio)
440455
break;
441456

442-
if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) {
457+
if (filemap_add_folio(mapping, folio, pg_index, cache_gfp)) {
443458
/* There is already a page, skip to page end */
444459
cur += folio_size(folio);
445460
folio_put(folio);
@@ -532,13 +547,25 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
532547
unsigned int compressed_len;
533548
const u32 min_folio_size = btrfs_min_folio_size(fs_info);
534549
u64 file_offset = bbio->file_offset;
550+
gfp_t gfp;
535551
u64 em_len;
536552
u64 em_start;
537553
struct extent_map *em;
538554
unsigned long pflags;
539555
int memstall = 0;
540556
int ret;
541557

558+
/*
559+
* If this is a readahead bio, prevent direct reclaim. This is done to
560+
* avoid stalling on speculative allocations when memory pressure is
561+
* high. The demand fault will retry with GFP_NOFS and enter direct
562+
* reclaim if needed.
563+
*/
564+
if (bbio->bio.bi_opf & REQ_RAHEAD)
565+
gfp = (GFP_NOFS & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN;
566+
else
567+
gfp = GFP_NOFS;
568+
542569
/* we need the actual starting offset of this extent in the file */
543570
read_lock(&em_tree->lock);
544571
em = btrfs_lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
@@ -569,7 +596,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
569596
struct folio *folio;
570597
u32 cur_len = min(compressed_len - i * min_folio_size, min_folio_size);
571598

572-
folio = btrfs_alloc_compr_folio(fs_info);
599+
folio = btrfs_alloc_compr_folio(fs_info, gfp);
573600
if (!folio) {
574601
ret = -ENOMEM;
575602
goto out_free_bio;
@@ -585,7 +612,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
585612
ASSERT(cb->bbio.bio.bi_iter.bi_size == compressed_len);
586613

587614
add_ra_bio_pages(&inode->vfs_inode, em_start + em_len, cb, &memstall,
588-
&pflags);
615+
&pflags, !(bbio->bio.bi_opf & REQ_RAHEAD));
589616

590617
cb->len = bbio->bio.bi_iter.bi_size;
591618
cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector;

fs/btrfs/compression.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
9898

9999
int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret);
100100

101-
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info);
101+
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info, gfp_t gfp);
102102
void btrfs_free_compr_folio(struct folio *folio);
103103

104104
struct workspace_manager {

fs/btrfs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9980,7 +9980,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
99809980
size_t bytes = min(min_folio_size, iov_iter_count(from));
99819981
char *kaddr;
99829982

9983-
folio = btrfs_alloc_compr_folio(fs_info);
9983+
folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
99849984
if (!folio) {
99859985
ret = -ENOMEM;
99869986
goto out_cb;

fs/btrfs/lzo.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info,
202202
ASSERT((old_size >> sectorsize_bits) == (old_size + LZO_LEN - 1) >> sectorsize_bits);
203203

204204
if (!*out_folio) {
205-
*out_folio = btrfs_alloc_compr_folio(fs_info);
205+
*out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
206206
if (!*out_folio)
207207
return -ENOMEM;
208208
}
@@ -229,7 +229,7 @@ static int copy_compressed_data_to_bio(struct btrfs_fs_info *fs_info,
229229
return -E2BIG;
230230

231231
if (!*out_folio) {
232-
*out_folio = btrfs_alloc_compr_folio(fs_info);
232+
*out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
233233
if (!*out_folio)
234234
return -ENOMEM;
235235
}
@@ -280,7 +280,7 @@ int lzo_compress_bio(struct list_head *ws, struct compressed_bio *cb)
280280
ASSERT(bio->bi_iter.bi_size == 0);
281281
ASSERT(len);
282282

283-
folio_out = btrfs_alloc_compr_folio(fs_info);
283+
folio_out = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
284284
if (!folio_out)
285285
return -ENOMEM;
286286

fs/btrfs/zlib.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
172172
workspace->strm.total_in = 0;
173173
workspace->strm.total_out = 0;
174174

175-
out_folio = btrfs_alloc_compr_folio(fs_info);
175+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
176176
if (out_folio == NULL) {
177177
ret = -ENOMEM;
178178
goto out;
@@ -254,7 +254,7 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
254254
goto out;
255255
}
256256

257-
out_folio = btrfs_alloc_compr_folio(fs_info);
257+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
258258
if (out_folio == NULL) {
259259
ret = -ENOMEM;
260260
goto out;
@@ -291,7 +291,7 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
291291
goto out;
292292
}
293293
/* Get another folio for the stream end. */
294-
out_folio = btrfs_alloc_compr_folio(fs_info);
294+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
295295
if (out_folio == NULL) {
296296
ret = -ENOMEM;
297297
goto out;

fs/btrfs/zstd.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb)
437437
workspace->in_buf.size = btrfs_calc_input_length(in_folio, end, start);
438438

439439
/* Allocate and map in the output buffer. */
440-
out_folio = btrfs_alloc_compr_folio(fs_info);
440+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
441441
if (out_folio == NULL) {
442442
ret = -ENOMEM;
443443
goto out;
@@ -480,7 +480,7 @@ int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb)
480480
goto out;
481481
}
482482

483-
out_folio = btrfs_alloc_compr_folio(fs_info);
483+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
484484
if (out_folio == NULL) {
485485
ret = -ENOMEM;
486486
goto out;
@@ -553,7 +553,7 @@ int zstd_compress_bio(struct list_head *ws, struct compressed_bio *cb)
553553
ret = -E2BIG;
554554
goto out;
555555
}
556-
out_folio = btrfs_alloc_compr_folio(fs_info);
556+
out_folio = btrfs_alloc_compr_folio(fs_info, GFP_NOFS);
557557
if (out_folio == NULL) {
558558
ret = -ENOMEM;
559559
goto out;

0 commit comments

Comments
 (0)