Skip to content

Commit 0f00132

Browse files
committed
Merge tag 'vfs-7.1-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs integrity updates from Christian Brauner: "This adds support to generate and verify integrity information (aka T10 PI) in the file system, instead of the automatic below the covers support that is currently used. The implementation is based on refactoring the existing block layer PI code to be reusable for this use case, and then adding relatively small wrappers for the file system use case. These are then used in iomap to implement the semantics, and wired up in XFS with a small amount of glue code. Compared to the baseline this does not change performance for writes, but increases read performance up to 15% for 4k I/O, with the benefit decreasing with larger I/O sizes as even the baseline maxes out the device quickly on my older enterprise SSD" * tag 'vfs-7.1-rc1.integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: xfs: support T10 protection information iomap: support T10 protection information iomap: support ioends for buffered reads iomap: add a bioset pointer to iomap_read_folio_ops ntfs3: remove copy and pasted iomap code iomap: allow file systems to hook into buffered read bio submission iomap: only call into ->submit_read when there is a read_ctx iomap: pass the iomap_iter to ->submit_read iomap: refactor iomap_bio_read_folio_range block: pass a maxlen argument to bio_iov_iter_bounce block: add fs_bio_integrity helpers block: make max_integrity_io_size public block: prepare generation / verification helpers for fs usage block: add a bdev_has_integrity_csum helper block: factor out a bio_integrity_setup_default helper block: factor out a bio_integrity_action helper
2 parents 3383589 + 1b63f91 commit 0f00132

24 files changed

Lines changed: 467 additions & 235 deletions

block/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
2626
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
2727

2828
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
29-
bio-integrity-auto.o
29+
bio-integrity-auto.o bio-integrity-fs.o
3030
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
3131
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
3232
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o

block/bio-integrity-auto.c

Lines changed: 11 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
3939
container_of(work, struct bio_integrity_data, work);
4040
struct bio *bio = bid->bio;
4141

42-
blk_integrity_verify_iter(bio, &bid->saved_bio_iter);
42+
bio->bi_status = bio_integrity_verify(bio, &bid->saved_bio_iter);
4343
bio_integrity_finish(bid);
4444
bio_endio(bio);
4545
}
@@ -50,11 +50,6 @@ static bool bip_should_check(struct bio_integrity_payload *bip)
5050
return bip->bip_flags & BIP_CHECK_FLAGS;
5151
}
5252

53-
static bool bi_offload_capable(struct blk_integrity *bi)
54-
{
55-
return bi->metadata_size == bi->pi_tuple_size;
56-
}
57-
5853
/**
5954
* __bio_integrity_endio - Integrity I/O completion function
6055
* @bio: Protected bio
@@ -84,83 +79,30 @@ bool __bio_integrity_endio(struct bio *bio)
8479
/**
8580
* bio_integrity_prep - Prepare bio for integrity I/O
8681
* @bio: bio to prepare
82+
* @action: preparation action needed (BI_ACT_*)
8783
*
88-
* Checks if the bio already has an integrity payload attached. If it does, the
89-
* payload has been generated by another kernel subsystem, and we just pass it
90-
* through.
91-
* Otherwise allocates integrity payload and for writes the integrity metadata
92-
* will be generated. For reads, the completion handler will verify the
93-
* metadata.
84+
* Allocate the integrity payload. For writes, generate the integrity metadata
85+
* and for reads, setup the completion handler to verify the metadata.
86+
*
87+
* This is used for bios that do not have user integrity payloads attached.
9488
*/
95-
bool bio_integrity_prep(struct bio *bio)
89+
void bio_integrity_prep(struct bio *bio, unsigned int action)
9690
{
97-
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
9891
struct bio_integrity_data *bid;
99-
bool set_flags = true;
100-
gfp_t gfp = GFP_NOIO;
101-
102-
if (!bi)
103-
return true;
104-
105-
if (!bio_sectors(bio))
106-
return true;
107-
108-
/* Already protected? */
109-
if (bio_integrity(bio))
110-
return true;
111-
112-
switch (bio_op(bio)) {
113-
case REQ_OP_READ:
114-
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
115-
if (bi_offload_capable(bi))
116-
return true;
117-
set_flags = false;
118-
}
119-
break;
120-
case REQ_OP_WRITE:
121-
/*
122-
* Zero the memory allocated to not leak uninitialized kernel
123-
* memory to disk for non-integrity metadata where nothing else
124-
* initializes the memory.
125-
*/
126-
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
127-
if (bi_offload_capable(bi))
128-
return true;
129-
set_flags = false;
130-
gfp |= __GFP_ZERO;
131-
} else if (bi->metadata_size > bi->pi_tuple_size)
132-
gfp |= __GFP_ZERO;
133-
break;
134-
default:
135-
return true;
136-
}
137-
138-
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
139-
return true;
14092

14193
bid = mempool_alloc(&bid_pool, GFP_NOIO);
14294
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
14395
bid->bio = bio;
14496
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
145-
bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);
146-
147-
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
148-
149-
if (set_flags) {
150-
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
151-
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
152-
if (bi->csum_type)
153-
bid->bip.bip_flags |= BIP_CHECK_GUARD;
154-
if (bi->flags & BLK_INTEGRITY_REF_TAG)
155-
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
156-
}
97+
bio_integrity_alloc_buf(bio, action & BI_ACT_ZERO);
98+
if (action & BI_ACT_CHECK)
99+
bio_integrity_setup_default(bio);
157100

158101
/* Auto-generate integrity metadata if this is a write */
159102
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
160-
blk_integrity_generate(bio);
103+
bio_integrity_generate(bio);
161104
else
162105
bid->saved_bio_iter = bio->bi_iter;
163-
return true;
164106
}
165107
EXPORT_SYMBOL(bio_integrity_prep);
166108

block/bio-integrity-fs.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2025 Christoph Hellwig.
4+
*/
5+
#include <linux/blk-integrity.h>
6+
#include <linux/bio-integrity.h>
7+
#include "blk.h"
8+
9+
struct fs_bio_integrity_buf {
10+
struct bio_integrity_payload bip;
11+
struct bio_vec bvec;
12+
};
13+
14+
static struct kmem_cache *fs_bio_integrity_cache;
15+
static mempool_t fs_bio_integrity_pool;
16+
17+
unsigned int fs_bio_integrity_alloc(struct bio *bio)
18+
{
19+
struct fs_bio_integrity_buf *iib;
20+
unsigned int action;
21+
22+
action = bio_integrity_action(bio);
23+
if (!action)
24+
return 0;
25+
26+
iib = mempool_alloc(&fs_bio_integrity_pool, GFP_NOIO);
27+
bio_integrity_init(bio, &iib->bip, &iib->bvec, 1);
28+
29+
bio_integrity_alloc_buf(bio, action & BI_ACT_ZERO);
30+
if (action & BI_ACT_CHECK)
31+
bio_integrity_setup_default(bio);
32+
return action;
33+
}
34+
35+
void fs_bio_integrity_free(struct bio *bio)
36+
{
37+
struct bio_integrity_payload *bip = bio_integrity(bio);
38+
39+
bio_integrity_free_buf(bip);
40+
mempool_free(container_of(bip, struct fs_bio_integrity_buf, bip),
41+
&fs_bio_integrity_pool);
42+
43+
bio->bi_integrity = NULL;
44+
bio->bi_opf &= ~REQ_INTEGRITY;
45+
}
46+
47+
void fs_bio_integrity_generate(struct bio *bio)
48+
{
49+
if (fs_bio_integrity_alloc(bio))
50+
bio_integrity_generate(bio);
51+
}
52+
EXPORT_SYMBOL_GPL(fs_bio_integrity_generate);
53+
54+
int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size)
55+
{
56+
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
57+
struct bio_integrity_payload *bip = bio_integrity(bio);
58+
59+
/*
60+
* Reinitialize bip->bip_iter.
61+
*
62+
* This is for use in the submitter after the driver is done with the
63+
* bio. Requires the submitter to remember the sector and the size.
64+
*/
65+
memset(&bip->bip_iter, 0, sizeof(bip->bip_iter));
66+
bip->bip_iter.bi_sector = sector;
67+
bip->bip_iter.bi_size = bio_integrity_bytes(bi, size >> SECTOR_SHIFT);
68+
return blk_status_to_errno(bio_integrity_verify(bio, &bip->bip_iter));
69+
}
70+
71+
static int __init fs_bio_integrity_init(void)
72+
{
73+
fs_bio_integrity_cache = kmem_cache_create("fs_bio_integrity",
74+
sizeof(struct fs_bio_integrity_buf), 0,
75+
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
76+
if (mempool_init_slab_pool(&fs_bio_integrity_pool, BIO_POOL_SIZE,
77+
fs_bio_integrity_cache))
78+
panic("fs_bio_integrity: can't create pool\n");
79+
return 0;
80+
}
81+
fs_initcall(fs_bio_integrity_init);

block/bio-integrity.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <linux/blk-integrity.h>
10+
#include <linux/t10-pi.h>
1011
#include "blk.h"
1112

1213
struct bio_integrity_alloc {
@@ -16,6 +17,53 @@ struct bio_integrity_alloc {
1617

1718
static mempool_t integrity_buf_pool;
1819

20+
static bool bi_offload_capable(struct blk_integrity *bi)
21+
{
22+
return bi->metadata_size == bi->pi_tuple_size;
23+
}
24+
25+
unsigned int __bio_integrity_action(struct bio *bio)
26+
{
27+
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
28+
29+
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
30+
return 0;
31+
32+
switch (bio_op(bio)) {
33+
case REQ_OP_READ:
34+
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
35+
if (bi_offload_capable(bi))
36+
return 0;
37+
return BI_ACT_BUFFER;
38+
}
39+
return BI_ACT_BUFFER | BI_ACT_CHECK;
40+
case REQ_OP_WRITE:
41+
/*
42+
* Flush masquerading as write?
43+
*/
44+
if (!bio_sectors(bio))
45+
return 0;
46+
47+
/*
48+
* Zero the memory allocated to not leak uninitialized kernel
49+
* memory to disk for non-integrity metadata where nothing else
50+
* initializes the memory.
51+
*/
52+
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
53+
if (bi_offload_capable(bi))
54+
return 0;
55+
return BI_ACT_BUFFER | BI_ACT_ZERO;
56+
}
57+
58+
if (bi->metadata_size > bi->pi_tuple_size)
59+
return BI_ACT_BUFFER | BI_ACT_CHECK | BI_ACT_ZERO;
60+
return BI_ACT_BUFFER | BI_ACT_CHECK;
61+
default:
62+
return 0;
63+
}
64+
}
65+
EXPORT_SYMBOL_GPL(__bio_integrity_action);
66+
1967
void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
2068
{
2169
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
@@ -53,6 +101,22 @@ void bio_integrity_free_buf(struct bio_integrity_payload *bip)
53101
kfree(bvec_virt(bv));
54102
}
55103

104+
void bio_integrity_setup_default(struct bio *bio)
105+
{
106+
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
107+
struct bio_integrity_payload *bip = bio_integrity(bio);
108+
109+
bip_set_seed(bip, bio->bi_iter.bi_sector);
110+
111+
if (bi->csum_type) {
112+
bip->bip_flags |= BIP_CHECK_GUARD;
113+
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
114+
bip->bip_flags |= BIP_IP_CHECKSUM;
115+
}
116+
if (bi->flags & BLK_INTEGRITY_REF_TAG)
117+
bip->bip_flags |= BIP_CHECK_REFTAG;
118+
}
119+
56120
/**
57121
* bio_integrity_free - Free bio integrity payload
58122
* @bio: bio containing bip to be freed

block/bio.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,9 +1327,10 @@ static void bio_free_folios(struct bio *bio)
13271327
}
13281328
}
13291329

1330-
static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter)
1330+
static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
1331+
size_t maxlen)
13311332
{
1332-
size_t total_len = iov_iter_count(iter);
1333+
size_t total_len = min(maxlen, iov_iter_count(iter));
13331334

13341335
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
13351336
return -EINVAL;
@@ -1367,9 +1368,10 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter)
13671368
return 0;
13681369
}
13691370

1370-
static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
1371+
static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
1372+
size_t maxlen)
13711373
{
1372-
size_t len = min(iov_iter_count(iter), SZ_1M);
1374+
size_t len = min3(iov_iter_count(iter), maxlen, SZ_1M);
13731375
struct folio *folio;
13741376

13751377
folio = folio_alloc_greedy(GFP_KERNEL, &len);
@@ -1408,18 +1410,19 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
14081410
* bio_iov_iter_bounce - bounce buffer data from an iter into a bio
14091411
* @bio: bio to send
14101412
* @iter: iter to read from / write into
1413+
* @maxlen: maximum size to bounce
14111414
*
14121415
* Helper for direct I/O implementations that need to bounce buffer because
14131416
* we need to checksum the data or perform other operations that require
14141417
* consistency. Allocates folios to back the bounce buffer, and for writes
14151418
* copies the data into it. Needs to be paired with bio_iov_iter_unbounce()
14161419
* called on completion.
14171420
*/
1418-
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter)
1421+
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen)
14191422
{
14201423
if (op_is_write(bio_op(bio)))
1421-
return bio_iov_iter_bounce_write(bio, iter);
1422-
return bio_iov_iter_bounce_read(bio, iter);
1424+
return bio_iov_iter_bounce_write(bio, iter, maxlen);
1425+
return bio_iov_iter_bounce_read(bio, iter, maxlen);
14231426
}
14241427

14251428
static void bvec_unpin(struct bio_vec *bv, bool mark_dirty)

block/blk-mq.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3143,6 +3143,7 @@ void blk_mq_submit_bio(struct bio *bio)
31433143
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
31443144
struct blk_plug *plug = current->plug;
31453145
const int is_sync = op_is_sync(bio->bi_opf);
3146+
unsigned int integrity_action;
31463147
struct blk_mq_hw_ctx *hctx;
31473148
unsigned int nr_segs;
31483149
struct request *rq;
@@ -3195,8 +3196,9 @@ void blk_mq_submit_bio(struct bio *bio)
31953196
if (!bio)
31963197
goto queue_exit;
31973198

3198-
if (!bio_integrity_prep(bio))
3199-
goto queue_exit;
3199+
integrity_action = bio_integrity_action(bio);
3200+
if (integrity_action)
3201+
bio_integrity_prep(bio, integrity_action);
32003202

32013203
blk_mq_bio_issue_init(q, bio);
32023204
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))

block/blk-settings.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,19 +123,6 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
123123
return 0;
124124
}
125125

126-
/*
127-
* Maximum size of I/O that needs a block layer integrity buffer. Limited
128-
* by the number of intervals for which we can fit the integrity buffer into
129-
* the buffer size. Because the buffer is a single segment it is also limited
130-
* by the maximum segment size.
131-
*/
132-
static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
133-
{
134-
return min_t(unsigned int, lim->max_segment_size,
135-
(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
136-
lim->integrity.interval_exp);
137-
}
138-
139126
static int blk_validate_integrity_limits(struct queue_limits *lim)
140127
{
141128
struct blk_integrity *bi = &lim->integrity;

block/blk.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,10 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
699699
const struct blk_holder_ops *hops, struct file *bdev_file);
700700
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
701701

702-
void blk_integrity_generate(struct bio *bio);
703-
void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter);
702+
void bio_integrity_generate(struct bio *bio);
703+
blk_status_t bio_integrity_verify(struct bio *bio,
704+
struct bvec_iter *saved_iter);
705+
704706
void blk_integrity_prepare(struct request *rq);
705707
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
706708

0 commit comments

Comments
 (0)