Skip to content

Commit 1aec300

Browse files
FirstLoveLifetytso
authored andcommitted
ext4: publish jinode after initialization
ext4_inode_attach_jinode() publishes ei->jinode to concurrent users. It used to set ei->jinode before jbd2_journal_init_jbd_inode(), allowing a reader to observe a non-NULL jinode with i_vfs_inode still unset. The fast commit flush path can then pass this jinode to jbd2_wait_inode_data(), which dereferences i_vfs_inode->i_mapping and may crash. Below is the crash I observe: ``` BUG: unable to handle page fault for address: 000000010beb47f4 PGD 110e51067 P4D 110e51067 PUD 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 4850 Comm: fc_fsync_bench_ Not tainted 6.18.0-00764-g795a690c06a5 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.17.0-2-2 04/01/2014 RIP: 0010:xas_find_marked+0x3d/0x2e0 Code: e0 03 48 83 f8 02 0f 84 f0 01 00 00 48 8b 47 08 48 89 c3 48 39 c6 0f 82 fd 01 00 00 48 85 c9 74 3d 48 83 f9 03 77 63 4c 8b 0f <49> 8b 71 08 48 c7 47 18 00 00 00 00 48 89 f1 83 e1 03 48 83 f9 02 RSP: 0018:ffffbbee806e7bf0 EFLAGS: 00010246 RAX: 000000000010beb4 RBX: 000000000010beb4 RCX: 0000000000000003 RDX: 0000000000000001 RSI: 0000002000300000 RDI: ffffbbee806e7c10 RBP: 0000000000000001 R08: 0000002000300000 R09: 000000010beb47ec R10: ffff9ea494590090 R11: 0000000000000000 R12: 0000002000300000 R13: ffffbbee806e7c90 R14: ffff9ea494513788 R15: ffffbbee806e7c88 FS: 00007fc2f9e3e6c0(0000) GS:ffff9ea6b1444000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000010beb47f4 CR3: 0000000119ac5000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: <TASK> filemap_get_folios_tag+0x87/0x2a0 __filemap_fdatawait_range+0x5f/0xd0 ? srso_alias_return_thunk+0x5/0xfbef5 ? __schedule+0x3e7/0x10c0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? cap_safe_nice+0x37/0x70 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 filemap_fdatawait_range_keep_errors+0x12/0x40 ext4_fc_commit+0x697/0x8b0 ? ext4_file_write_iter+0x64b/0x950 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? vfs_write+0x356/0x480 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ext4_sync_file+0xf7/0x370 do_fsync+0x3b/0x80 ? syscall_trace_enter+0x108/0x1d0 __x64_sys_fdatasync+0x16/0x20 do_syscall_64+0x62/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... ``` Fix this by initializing the jbd2_inode first. Use smp_wmb() and WRITE_ONCE() to publish ei->jinode after initialization. Readers use READ_ONCE() to fetch the pointer. Fixes: a361293 ("jbd2: Fix oops in jbd2_journal_file_inode()") Cc: stable@vger.kernel.org Signed-off-by: Li Chen <me@linux.beauty> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://patch.msgid.link/20260225082617.147957-1-me@linux.beauty Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org
1 parent 3562270 commit 1aec300

2 files changed

Lines changed: 13 additions & 6 deletions

File tree

fs/ext4/fast_commit.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal)
975975
int ret = 0;
976976

977977
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
978-
ret = jbd2_submit_inode_data(journal, ei->jinode);
978+
ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode));
979979
if (ret)
980980
return ret;
981981
}
982982

983983
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
984-
ret = jbd2_wait_inode_data(journal, ei->jinode);
984+
ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode));
985985
if (ret)
986986
return ret;
987987
}

fs/ext4/inode.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,19 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
128128
static inline int ext4_begin_ordered_truncate(struct inode *inode,
129129
loff_t new_size)
130130
{
131+
struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode);
132+
131133
trace_ext4_begin_ordered_truncate(inode, new_size);
132134
/*
133135
* If jinode is zero, then we never opened the file for
134136
* writing, so there's no need to call
135137
* jbd2_journal_begin_ordered_truncate() since there's no
136138
* outstanding writes we need to flush.
137139
*/
138-
if (!EXT4_I(inode)->jinode)
140+
if (!jinode)
139141
return 0;
140142
return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
141-
EXT4_I(inode)->jinode,
143+
jinode,
142144
new_size);
143145
}
144146

@@ -4451,8 +4453,13 @@ int ext4_inode_attach_jinode(struct inode *inode)
44514453
spin_unlock(&inode->i_lock);
44524454
return -ENOMEM;
44534455
}
4454-
ei->jinode = jinode;
4455-
jbd2_journal_init_jbd_inode(ei->jinode, inode);
4456+
jbd2_journal_init_jbd_inode(jinode, inode);
4457+
/*
4458+
* Publish ->jinode only after it is fully initialized so that
4459+
* readers never observe a partially initialized jbd2_inode.
4460+
*/
4461+
smp_wmb();
4462+
WRITE_ONCE(ei->jinode, jinode);
44564463
jinode = NULL;
44574464
}
44584465
spin_unlock(&inode->i_lock);

0 commit comments

Comments
 (0)