Skip to content

Commit 1b63f91

Browse files
committed
Merge patch series "support file system generated / verified integrity information v4"
Christoph Hellwig <hch@lst.de> says: This series adds support to generate and verify integrity information (aka T10 PI) in the file system, instead of the automatic below the covers support that is currently used. There two reasons for this: a) to increase the protection enveloped. Right now this is just a minor step from the bottom of the block layer to the file system, but it is required to support io_uring integrity data passthrough in the file system similar to the currently existing support for block devices, which will follow next. It also allows the file system to directly see the integrity error and act upon in, e.g. when using RAID either integrated (as in btrfs) or by supporting reading redundant copies through the block layer. b) to make the PI processing more efficient. This is primarily a concern for reads, where the block layer auto PI has to schedule a work item for each bio, and the file system them has to do it again for bounce buffering. Additionally the current iomap post-I/O workqueue handling is a lot more efficient by supporting merging and avoiding workqueue scheduling storms. The implementation is based on refactoring the existing block layer PI code to be reusable for this use case, and then adding relatively small wrappers for the file system use case. These are then used in iomap to implement the semantics, and wired up in XFS with a small amount of glue code. Compared to the baseline (iomap-bounce branch), this does not change performance for writes, but increases read performance up to 15% for 4k I/O, with the benefit decreasing with larger I/O sizes as even the baseline maxes out the device quickly on my older enterprise SSD. Anuj Gupta also measured a large decrease in QD1 latency on an Intel Optane device for small I/O sizes, but also an increase for very large ones. Note that the upcoming XFS fsverity support also depends on some infrastructure in this series. * patches from https://patch.msgid.link/20260223132021.292832-1-hch@lst.de: xfs: support T10 protection information iomap: support T10 protection information iomap: support ioends for buffered reads iomap: add a bioset pointer to iomap_read_folio_ops ntfs3: remove copy and pasted iomap code iomap: allow file systems to hook into buffered read bio submission iomap: only call into ->submit_read when there is a read_ctx iomap: pass the iomap_iter to ->submit_read iomap: refactor iomap_bio_read_folio_range Link: https://patch.msgid.link/20260223132021.292832-1-hch@lst.de Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents 969ebeb + 6bbb4d9 commit 1b63f91

10 files changed

Lines changed: 224 additions & 116 deletions

File tree

fs/fuse/file.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,8 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
947947
return ret;
948948
}
949949

950-
static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
950+
static void fuse_iomap_submit_read(const struct iomap_iter *iter,
951+
struct iomap_read_folio_ctx *ctx)
951952
{
952953
struct fuse_fill_read_data *data = ctx->read_ctx;
953954

@@ -958,7 +959,7 @@ static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
958959

959960
static const struct iomap_read_ops fuse_iomap_read_ops = {
960961
.read_folio_range = fuse_iomap_read_folio_range_async,
961-
.submit_read = fuse_iomap_read_submit,
962+
.submit_read = fuse_iomap_submit_read,
962963
};
963964

964965
static int fuse_read_folio(struct file *file, struct folio *folio)

fs/iomap/bio.c

Lines changed: 91 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,86 +3,133 @@
33
* Copyright (C) 2010 Red Hat, Inc.
44
* Copyright (C) 2016-2023 Christoph Hellwig.
55
*/
6+
#include <linux/bio-integrity.h>
67
#include <linux/iomap.h>
78
#include <linux/pagemap.h>
89
#include "internal.h"
910
#include "trace.h"
1011

11-
static void iomap_read_end_io(struct bio *bio)
12+
static u32 __iomap_read_end_io(struct bio *bio, int error)
1213
{
13-
int error = blk_status_to_errno(bio->bi_status);
1414
struct folio_iter fi;
15+
u32 folio_count = 0;
1516

16-
bio_for_each_folio_all(fi, bio)
17+
bio_for_each_folio_all(fi, bio) {
1718
iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error);
19+
folio_count++;
20+
}
21+
if (bio_integrity(bio))
22+
fs_bio_integrity_free(bio);
1823
bio_put(bio);
24+
return folio_count;
25+
}
26+
27+
static void iomap_read_end_io(struct bio *bio)
28+
{
29+
__iomap_read_end_io(bio, blk_status_to_errno(bio->bi_status));
1930
}
2031

21-
static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
32+
u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
33+
{
34+
return __iomap_read_end_io(&ioend->io_bio, ioend->io_error);
35+
}
36+
37+
static void iomap_bio_submit_read(const struct iomap_iter *iter,
38+
struct iomap_read_folio_ctx *ctx)
2239
{
2340
struct bio *bio = ctx->read_ctx;
2441

25-
if (bio)
26-
submit_bio(bio);
42+
if (iter->iomap.flags & IOMAP_F_INTEGRITY)
43+
fs_bio_integrity_alloc(bio);
44+
submit_bio(bio);
45+
}
46+
47+
static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
48+
{
49+
if (ctx->ops && ctx->ops->bio_set)
50+
return ctx->ops->bio_set;
51+
return &fs_bio_set;
52+
}
53+
54+
static void iomap_read_alloc_bio(const struct iomap_iter *iter,
55+
struct iomap_read_folio_ctx *ctx, size_t plen)
56+
{
57+
const struct iomap *iomap = &iter->iomap;
58+
unsigned int nr_vecs = DIV_ROUND_UP(iomap_length(iter), PAGE_SIZE);
59+
struct bio_set *bio_set = iomap_read_bio_set(ctx);
60+
struct folio *folio = ctx->cur_folio;
61+
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
62+
gfp_t orig_gfp = gfp;
63+
struct bio *bio;
64+
65+
/* Submit the existing range if there was one. */
66+
if (ctx->read_ctx)
67+
ctx->ops->submit_read(iter, ctx);
68+
69+
/* Same as readahead_gfp_mask: */
70+
if (ctx->rac)
71+
gfp |= __GFP_NORETRY | __GFP_NOWARN;
72+
73+
/*
74+
* If the bio_alloc fails, try it again for a single page to avoid
75+
* having to deal with partial page reads. This emulates what
76+
* do_mpage_read_folio does.
77+
*/
78+
bio = bio_alloc_bioset(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ,
79+
gfp, bio_set);
80+
if (!bio)
81+
bio = bio_alloc_bioset(iomap->bdev, 1, REQ_OP_READ, orig_gfp,
82+
bio_set);
83+
if (ctx->rac)
84+
bio->bi_opf |= REQ_RAHEAD;
85+
bio->bi_iter.bi_sector = iomap_sector(iomap, iter->pos);
86+
bio->bi_end_io = iomap_read_end_io;
87+
bio_add_folio_nofail(bio, folio, plen,
88+
offset_in_folio(folio, iter->pos));
89+
ctx->read_ctx = bio;
90+
ctx->read_ctx_file_offset = iter->pos;
2791
}
2892

29-
static int iomap_bio_read_folio_range(const struct iomap_iter *iter,
93+
int iomap_bio_read_folio_range(const struct iomap_iter *iter,
3094
struct iomap_read_folio_ctx *ctx, size_t plen)
3195
{
3296
struct folio *folio = ctx->cur_folio;
33-
const struct iomap *iomap = &iter->iomap;
34-
loff_t pos = iter->pos;
35-
size_t poff = offset_in_folio(folio, pos);
36-
loff_t length = iomap_length(iter);
37-
sector_t sector;
3897
struct bio *bio = ctx->read_ctx;
3998

40-
sector = iomap_sector(iomap, pos);
41-
if (!bio || bio_end_sector(bio) != sector ||
42-
!bio_add_folio(bio, folio, plen, poff)) {
43-
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
44-
gfp_t orig_gfp = gfp;
45-
unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
46-
47-
if (bio)
48-
submit_bio(bio);
49-
50-
if (ctx->rac) /* same as readahead_gfp_mask */
51-
gfp |= __GFP_NORETRY | __GFP_NOWARN;
52-
bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ,
53-
gfp);
54-
/*
55-
* If the bio_alloc fails, try it again for a single page to
56-
* avoid having to deal with partial page reads. This emulates
57-
* what do_mpage_read_folio does.
58-
*/
59-
if (!bio)
60-
bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp);
61-
if (ctx->rac)
62-
bio->bi_opf |= REQ_RAHEAD;
63-
bio->bi_iter.bi_sector = sector;
64-
bio->bi_end_io = iomap_read_end_io;
65-
bio_add_folio_nofail(bio, folio, plen, poff);
66-
ctx->read_ctx = bio;
67-
}
99+
if (!bio ||
100+
bio_end_sector(bio) != iomap_sector(&iter->iomap, iter->pos) ||
101+
bio->bi_iter.bi_size > iomap_max_bio_size(&iter->iomap) - plen ||
102+
!bio_add_folio(bio, folio, plen, offset_in_folio(folio, iter->pos)))
103+
iomap_read_alloc_bio(iter, ctx, plen);
68104
return 0;
69105
}
106+
EXPORT_SYMBOL_GPL(iomap_bio_read_folio_range);
70107

71108
const struct iomap_read_ops iomap_bio_read_ops = {
72-
.read_folio_range = iomap_bio_read_folio_range,
73-
.submit_read = iomap_bio_submit_read,
109+
.read_folio_range = iomap_bio_read_folio_range,
110+
.submit_read = iomap_bio_submit_read,
74111
};
75112
EXPORT_SYMBOL_GPL(iomap_bio_read_ops);
76113

77114
int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter,
78115
struct folio *folio, loff_t pos, size_t len)
79116
{
80117
const struct iomap *srcmap = iomap_iter_srcmap(iter);
118+
sector_t sector = iomap_sector(srcmap, pos);
81119
struct bio_vec bvec;
82120
struct bio bio;
121+
int error;
83122

84123
bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ);
85-
bio.bi_iter.bi_sector = iomap_sector(srcmap, pos);
124+
bio.bi_iter.bi_sector = sector;
86125
bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos));
87-
return submit_bio_wait(&bio);
126+
if (srcmap->flags & IOMAP_F_INTEGRITY)
127+
fs_bio_integrity_alloc(&bio);
128+
error = submit_bio_wait(&bio);
129+
if (srcmap->flags & IOMAP_F_INTEGRITY) {
130+
if (!error)
131+
error = fs_bio_integrity_verify(&bio, sector, len);
132+
fs_bio_integrity_free(&bio);
133+
}
134+
return error;
88135
}

fs/iomap/buffered-io.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -596,8 +596,8 @@ void iomap_read_folio(const struct iomap_ops *ops,
596596
iter.status = iomap_read_folio_iter(&iter, ctx,
597597
&bytes_submitted);
598598

599-
if (ctx->ops->submit_read)
600-
ctx->ops->submit_read(ctx);
599+
if (ctx->read_ctx && ctx->ops->submit_read)
600+
ctx->ops->submit_read(&iter, ctx);
601601

602602
if (ctx->cur_folio)
603603
iomap_read_end(ctx->cur_folio, bytes_submitted);
@@ -663,8 +663,8 @@ void iomap_readahead(const struct iomap_ops *ops,
663663
iter.status = iomap_readahead_iter(&iter, ctx,
664664
&cur_bytes_submitted);
665665

666-
if (ctx->ops->submit_read)
667-
ctx->ops->submit_read(ctx);
666+
if (ctx->read_ctx && ctx->ops->submit_read)
667+
ctx->ops->submit_read(&iter, ctx);
668668

669669
if (ctx->cur_folio)
670670
iomap_read_end(ctx->cur_folio, cur_bytes_submitted);

fs/iomap/direct-io.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (C) 2010 Red Hat, Inc.
44
* Copyright (c) 2016-2025 Christoph Hellwig.
55
*/
6+
#include <linux/bio-integrity.h>
67
#include <linux/blk-crypto.h>
78
#include <linux/fscrypt.h>
89
#include <linux/pagemap.h>
@@ -240,6 +241,9 @@ static void __iomap_dio_bio_end_io(struct bio *bio, bool inline_completion)
240241
{
241242
struct iomap_dio *dio = bio->bi_private;
242243

244+
if (bio_integrity(bio))
245+
fs_bio_integrity_free(bio);
246+
243247
if (dio->flags & IOMAP_DIO_BOUNCE) {
244248
bio_iov_iter_unbounce(bio, !!dio->error,
245249
dio->flags & IOMAP_DIO_USER_BACKED);
@@ -350,8 +354,10 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
350354
bio->bi_private = dio;
351355
bio->bi_end_io = iomap_dio_bio_end_io;
352356

357+
353358
if (dio->flags & IOMAP_DIO_BOUNCE)
354-
ret = bio_iov_iter_bounce(bio, dio->submit.iter, BIO_MAX_SIZE);
359+
ret = bio_iov_iter_bounce(bio, dio->submit.iter,
360+
iomap_max_bio_size(&iter->iomap));
355361
else
356362
ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
357363
alignment - 1);
@@ -368,6 +374,13 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
368374
goto out_put_bio;
369375
}
370376

377+
if (iter->iomap.flags & IOMAP_F_INTEGRITY) {
378+
if (dio->flags & IOMAP_DIO_WRITE)
379+
fs_bio_integrity_generate(bio);
380+
else
381+
fs_bio_integrity_alloc(bio);
382+
}
383+
371384
if (dio->flags & IOMAP_DIO_WRITE)
372385
task_io_account_write(ret);
373386
else if ((dio->flags & IOMAP_DIO_USER_BACKED) &&

fs/iomap/internal.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@
44

55
#define IOEND_BATCH_SIZE 4096
66

7+
/*
8+
* Normally we can build bios as big as the data structure supports.
9+
*
10+
* But for integrity protected I/O we need to respect the maximum size of the
11+
* single contiguous allocation for the integrity buffer.
12+
*/
13+
static inline size_t iomap_max_bio_size(const struct iomap *iomap)
14+
{
15+
if (iomap->flags & IOMAP_F_INTEGRITY)
16+
return max_integrity_io_size(bdev_limits(iomap->bdev));
17+
return BIO_MAX_SIZE;
18+
}
19+
20+
u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend);
721
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
822

923
#ifdef CONFIG_BLOCK

fs/iomap/ioend.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*
33
* Copyright (c) 2016-2025 Christoph Hellwig.
44
*/
5+
#include <linux/bio-integrity.h>
56
#include <linux/iomap.h>
67
#include <linux/list_sort.h>
78
#include <linux/pagemap.h>
@@ -37,7 +38,7 @@ EXPORT_SYMBOL_GPL(iomap_init_ioend);
3738
* state, release holds on bios, and finally free up memory. Do not use the
3839
* ioend after this.
3940
*/
40-
static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
41+
static u32 iomap_finish_ioend_buffered_write(struct iomap_ioend *ioend)
4142
{
4243
struct inode *inode = ioend->io_inode;
4344
struct bio *bio = &ioend->io_bio;
@@ -65,6 +66,8 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
6566
folio_count++;
6667
}
6768

69+
if (bio_integrity(bio))
70+
fs_bio_integrity_free(bio);
6871
bio_put(bio); /* frees the ioend */
6972
return folio_count;
7073
}
@@ -87,7 +90,7 @@ iomap_fail_ioends(
8790
while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
8891
io_list))) {
8992
list_del_init(&ioend->io_list);
90-
iomap_finish_ioend_buffered(ioend);
93+
iomap_finish_ioend_buffered_write(ioend);
9194
cond_resched();
9295
}
9396
}
@@ -120,7 +123,7 @@ static void ioend_writeback_end_bio(struct bio *bio)
120123
return;
121124
}
122125

123-
iomap_finish_ioend_buffered(ioend);
126+
iomap_finish_ioend_buffered_write(ioend);
124127
}
125128

126129
/*
@@ -144,6 +147,8 @@ int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error)
144147
return error;
145148
}
146149

150+
if (wpc->iomap.flags & IOMAP_F_INTEGRITY)
151+
fs_bio_integrity_generate(&ioend->io_bio);
147152
submit_bio(&ioend->io_bio);
148153
return 0;
149154
}
@@ -165,10 +170,13 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
165170
}
166171

167172
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
168-
u16 ioend_flags)
173+
unsigned int map_len, u16 ioend_flags)
169174
{
170175
struct iomap_ioend *ioend = wpc->wb_ctx;
171176

177+
if (ioend->io_bio.bi_iter.bi_size >
178+
iomap_max_bio_size(&wpc->iomap) - map_len)
179+
return false;
172180
if (ioend_flags & IOMAP_IOEND_BOUNDARY)
173181
return false;
174182
if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
@@ -234,7 +242,7 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
234242
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
235243
ioend_flags |= IOMAP_IOEND_BOUNDARY;
236244

237-
if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) {
245+
if (!ioend || !iomap_can_add_to_ioend(wpc, pos, map_len, ioend_flags)) {
238246
new_ioend:
239247
if (ioend) {
240248
error = wpc->ops->writeback_submit(wpc, 0);
@@ -311,9 +319,19 @@ static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
311319

312320
if (!atomic_dec_and_test(&ioend->io_remaining))
313321
return 0;
322+
323+
if (!ioend->io_error &&
324+
bio_integrity(&ioend->io_bio) &&
325+
bio_op(&ioend->io_bio) == REQ_OP_READ) {
326+
ioend->io_error = fs_bio_integrity_verify(&ioend->io_bio,
327+
ioend->io_sector, ioend->io_size);
328+
}
329+
314330
if (ioend->io_flags & IOMAP_IOEND_DIRECT)
315331
return iomap_finish_ioend_direct(ioend);
316-
return iomap_finish_ioend_buffered(ioend);
332+
if (bio_op(&ioend->io_bio) == REQ_OP_READ)
333+
return iomap_finish_ioend_buffered_read(ioend);
334+
return iomap_finish_ioend_buffered_write(ioend);
317335
}
318336

319337
/*

0 commit comments

Comments
 (0)