Skip to content

Commit 3f60efd

Browse files
zhangyi089tytso
authored andcommitted
ext4: zero post-EOF partial block before appending write
In cases of appending write beyond EOF, ext4_zero_partial_blocks() is called within ext4_*_write_end() to zero out the partial block beyond EOF. This prevents exposing stale data that might be written through mmap. However, supporting only the regular buffered write path is insufficient. It is also necessary to support the DAX path as well as the upcoming iomap buffered write path. Therefore, move this operation to ext4_write_checks(). In addition, this may introduce a race window in which a post-EOF buffered write can race with an mmap write after the old EOF block has been zeroed. As a result, the data in this block written by the buffer-write and the data written by the mmap-write may be mixed. However, this is safe because users should not rely on the result of the race condition. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://patch.msgid.link/20260327102939.1095257-14-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 1ad0f42 commit 3f60efd

2 files changed

Lines changed: 24 additions & 14 deletions

File tree

fs/ext4/file.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
271271

272272
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
273273
{
274+
struct inode *inode = file_inode(iocb->ki_filp);
275+
loff_t old_size = i_size_read(inode);
274276
ssize_t ret, count;
275277

276278
count = ext4_generic_write_checks(iocb, from);
@@ -280,6 +282,21 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
280282
ret = file_modified(iocb->ki_filp);
281283
if (ret)
282284
return ret;
285+
286+
/*
287+
* If the position is beyond the EOF, it is necessary to zero out the
288+
* partial block that beyond the existing EOF, as it may contains
289+
* stale data written through mmap.
290+
*/
291+
if (iocb->ki_pos > old_size && !ext4_verity_in_progress(inode)) {
292+
if (iocb->ki_flags & IOCB_NOWAIT)
293+
return -EAGAIN;
294+
295+
ret = ext4_block_zero_eof(inode, old_size, iocb->ki_pos);
296+
if (ret)
297+
return ret;
298+
}
299+
283300
return count;
284301
}
285302

fs/ext4/inode.c

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,10 +1466,9 @@ static int ext4_write_end(const struct kiocb *iocb,
14661466
folio_unlock(folio);
14671467
folio_put(folio);
14681468

1469-
if (old_size < pos && !verity) {
1469+
if (old_size < pos && !verity)
14701470
pagecache_isize_extended(inode, old_size, pos);
1471-
ext4_block_zero_eof(inode, old_size, pos);
1472-
}
1471+
14731472
/*
14741473
* Don't mark the inode dirty under folio lock. First, it unnecessarily
14751474
* makes the holding time of folio lock longer. Second, it forces lock
@@ -1584,10 +1583,8 @@ static int ext4_journalled_write_end(const struct kiocb *iocb,
15841583
folio_unlock(folio);
15851584
folio_put(folio);
15861585

1587-
if (old_size < pos && !verity) {
1586+
if (old_size < pos && !verity)
15881587
pagecache_isize_extended(inode, old_size, pos);
1589-
ext4_block_zero_eof(inode, old_size, pos);
1590-
}
15911588

15921589
if (size_changed) {
15931590
ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -3226,7 +3223,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
32263223
struct inode *inode = mapping->host;
32273224
loff_t old_size = inode->i_size;
32283225
bool disksize_changed = false;
3229-
loff_t new_i_size, zero_len = 0;
3226+
loff_t new_i_size;
32303227
handle_t *handle;
32313228

32323229
if (unlikely(!folio_buffers(folio))) {
@@ -3270,19 +3267,15 @@ static int ext4_da_do_write_end(struct address_space *mapping,
32703267
folio_unlock(folio);
32713268
folio_put(folio);
32723269

3273-
if (pos > old_size) {
3270+
if (pos > old_size)
32743271
pagecache_isize_extended(inode, old_size, pos);
3275-
zero_len = pos - old_size;
3276-
}
32773272

3278-
if (!disksize_changed && !zero_len)
3273+
if (!disksize_changed)
32793274
return copied;
32803275

3281-
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3276+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
32823277
if (IS_ERR(handle))
32833278
return PTR_ERR(handle);
3284-
if (zero_len)
3285-
ext4_block_zero_eof(inode, old_size, pos);
32863279
ext4_mark_inode_dirty(handle, inode);
32873280
ext4_journal_stop(handle);
32883281

0 commit comments

Comments
 (0)