Skip to content

Commit 5686459

Browse files
Eric-Terminalakpm00
authored andcommitted
ocfs2/heartbeat: fix slot mapping rollback leaks on error paths
o2hb_map_slot_data() allocates hr_tmp_block, hr_slots, hr_slot_data, and pages in stages. If a later allocation fails, the current code returns without unwinding the earlier allocations. o2hb_region_dev_store() also leaves slot mapping resources behind when setup aborts, and it keeps hr_aborted_start/hr_node_deleted set across retries. That leaves stale state behind after a failed start. Factor the slot cleanup into o2hb_unmap_slot_data(), use it from both o2hb_map_slot_data() and o2hb_region_release(), and call it from the dev_store() rollback after stopping a started heartbeat thread. While freeing pages, clear each hr_slot_data entry as it is released, and reset the start state before each new setup attempt. This closes the slot mapping leak on allocation/setup failure paths and keeps failed setup attempts retryable. Link: https://lkml.kernel.org/r/20260330153428.19586-1-yufan.chen@linux.dev Signed-off-by: Yufan Chen <ericterminal@gmail.com> Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Changwei Ge <gechangwei@live.cn> Cc: Jun Piao <piaojun@huawei.com> Cc: Heming Zhao <heming.zhao@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent e3a84be commit 5686459

1 file changed

Lines changed: 56 additions & 27 deletions

File tree

fs/ocfs2/cluster/heartbeat.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,33 +1488,45 @@ static struct o2hb_region *to_o2hb_region(struct config_item *item)
14881488
return item ? container_of(item, struct o2hb_region, hr_item) : NULL;
14891489
}
14901490

1491-
/* drop_item only drops its ref after killing the thread, nothing should
1492-
* be using the region anymore. this has to clean up any state that
1493-
* attributes might have built up. */
1494-
static void o2hb_region_release(struct config_item *item)
1491+
static void o2hb_unmap_slot_data(struct o2hb_region *reg)
14951492
{
14961493
int i;
14971494
struct page *page;
1498-
struct o2hb_region *reg = to_o2hb_region(item);
1499-
1500-
mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg));
1501-
1502-
kfree(reg->hr_tmp_block);
15031495

15041496
if (reg->hr_slot_data) {
15051497
for (i = 0; i < reg->hr_num_pages; i++) {
15061498
page = reg->hr_slot_data[i];
1507-
if (page)
1499+
if (page) {
15081500
__free_page(page);
1501+
reg->hr_slot_data[i] = NULL;
1502+
}
15091503
}
15101504
kfree(reg->hr_slot_data);
1505+
reg->hr_slot_data = NULL;
15111506
}
15121507

1508+
kfree(reg->hr_slots);
1509+
reg->hr_slots = NULL;
1510+
1511+
kfree(reg->hr_tmp_block);
1512+
reg->hr_tmp_block = NULL;
1513+
}
1514+
1515+
/* drop_item only drops its ref after killing the thread, nothing should
1516+
* be using the region anymore. this has to clean up any state that
1517+
* attributes might have built up.
1518+
*/
1519+
static void o2hb_region_release(struct config_item *item)
1520+
{
1521+
struct o2hb_region *reg = to_o2hb_region(item);
1522+
1523+
mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg));
1524+
1525+
o2hb_unmap_slot_data(reg);
1526+
15131527
if (reg->hr_bdev_file)
15141528
fput(reg->hr_bdev_file);
15151529

1516-
kfree(reg->hr_slots);
1517-
15181530
debugfs_remove_recursive(reg->hr_debug_dir);
15191531
kfree(reg->hr_db_livenodes);
15201532
kfree(reg->hr_db_regnum);
@@ -1667,21 +1679,22 @@ static void o2hb_init_region_params(struct o2hb_region *reg)
16671679
static int o2hb_map_slot_data(struct o2hb_region *reg)
16681680
{
16691681
int i, j;
1682+
int ret = -ENOMEM;
16701683
unsigned int last_slot;
16711684
unsigned int spp = reg->hr_slots_per_page;
16721685
struct page *page;
16731686
char *raw;
16741687
struct o2hb_disk_slot *slot;
16751688

16761689
reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL);
1677-
if (reg->hr_tmp_block == NULL)
1678-
return -ENOMEM;
1690+
if (!reg->hr_tmp_block)
1691+
goto out;
16791692

16801693
reg->hr_slots = kzalloc_objs(struct o2hb_disk_slot, reg->hr_blocks);
1681-
if (reg->hr_slots == NULL)
1682-
return -ENOMEM;
1694+
if (!reg->hr_slots)
1695+
goto out;
16831696

1684-
for(i = 0; i < reg->hr_blocks; i++) {
1697+
for (i = 0; i < reg->hr_blocks; i++) {
16851698
slot = &reg->hr_slots[i];
16861699
slot->ds_node_num = i;
16871700
INIT_LIST_HEAD(&slot->ds_live_item);
@@ -1695,12 +1708,12 @@ static int o2hb_map_slot_data(struct o2hb_region *reg)
16951708

16961709
reg->hr_slot_data = kzalloc_objs(struct page *, reg->hr_num_pages);
16971710
if (!reg->hr_slot_data)
1698-
return -ENOMEM;
1711+
goto out;
16991712

1700-
for(i = 0; i < reg->hr_num_pages; i++) {
1713+
for (i = 0; i < reg->hr_num_pages; i++) {
17011714
page = alloc_page(GFP_KERNEL);
17021715
if (!page)
1703-
return -ENOMEM;
1716+
goto out;
17041717

17051718
reg->hr_slot_data[i] = page;
17061719

@@ -1720,6 +1733,10 @@ static int o2hb_map_slot_data(struct o2hb_region *reg)
17201733
}
17211734

17221735
return 0;
1736+
1737+
out:
1738+
o2hb_unmap_slot_data(reg);
1739+
return ret;
17231740
}
17241741

17251742
/* Read in all the slots available and populate the tracking
@@ -1809,9 +1826,11 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
18091826
"blocksize %u incorrect for device, expected %d",
18101827
reg->hr_block_bytes, sectsize);
18111828
ret = -EINVAL;
1812-
goto out3;
1829+
goto out;
18131830
}
18141831

1832+
reg->hr_aborted_start = 0;
1833+
reg->hr_node_deleted = 0;
18151834
o2hb_init_region_params(reg);
18161835

18171836
/* Generation of zero is invalid */
@@ -1823,13 +1842,13 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
18231842
ret = o2hb_map_slot_data(reg);
18241843
if (ret) {
18251844
mlog_errno(ret);
1826-
goto out3;
1845+
goto out;
18271846
}
18281847

18291848
ret = o2hb_populate_slot_data(reg);
18301849
if (ret) {
18311850
mlog_errno(ret);
1832-
goto out3;
1851+
goto out;
18331852
}
18341853

18351854
INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1860,7 +1879,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
18601879
if (IS_ERR(hb_task)) {
18611880
ret = PTR_ERR(hb_task);
18621881
mlog_errno(ret);
1863-
goto out3;
1882+
goto out;
18641883
}
18651884

18661885
spin_lock(&o2hb_live_lock);
@@ -1877,12 +1896,12 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
18771896

18781897
if (reg->hr_aborted_start) {
18791898
ret = -EIO;
1880-
goto out3;
1899+
goto out;
18811900
}
18821901

18831902
if (reg->hr_node_deleted) {
18841903
ret = -EINVAL;
1885-
goto out3;
1904+
goto out;
18861905
}
18871906

18881907
/* Ok, we were woken. Make sure it wasn't by drop_item() */
@@ -1901,8 +1920,18 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
19011920
printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n",
19021921
config_item_name(&reg->hr_item), reg_bdev(reg));
19031922

1904-
out3:
1923+
out:
19051924
if (ret < 0) {
1925+
spin_lock(&o2hb_live_lock);
1926+
hb_task = reg->hr_task;
1927+
reg->hr_task = NULL;
1928+
spin_unlock(&o2hb_live_lock);
1929+
1930+
if (hb_task)
1931+
kthread_stop(hb_task);
1932+
1933+
o2hb_unmap_slot_data(reg);
1934+
19061935
fput(reg->hr_bdev_file);
19071936
reg->hr_bdev_file = NULL;
19081937
}

0 commit comments

Comments
 (0)