Skip to content

Commit 485f07e

Browse files
committed
Merge branch 'for-7.1/block' into for-next
* for-7.1/block: md/raid5: fix soft lockup in retry_aligned_read() md: wake raid456 reshape waiters before suspend md/raid1: serialize overlap io for writemostly disk md/md-llbitmap: optimize initial sync with write_zeroes_unmap support md/md-llbitmap: add CleanUnwritten state for RAID-5 proactive parity building md: add fallback to correct bitmap_ops on version mismatch md/raid5: validate payload size before accessing journal metadata md: remove unused static md_wq workqueue md/raid0: use kvzalloc/kvfree for strip_zone and devlist allocations md: fix array_state=clear sysfs deadlock
2 parents cb793ff + d0cc5f5 commit 485f07e

7 files changed

Lines changed: 405 additions & 62 deletions

File tree

drivers/md/md-llbitmap.c

Lines changed: 189 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,20 @@ enum llbitmap_state {
208208
BitNeedSync,
209209
/* data is synchronizing */
210210
BitSyncing,
211+
/*
212+
* Proactive sync requested for unwritten region (raid456 only).
213+
* Triggered via sysfs when user wants to pre-build XOR parity
214+
* for regions that have never been written.
215+
*/
216+
BitNeedSyncUnwritten,
217+
/* Proactive sync in progress for unwritten region */
218+
BitSyncingUnwritten,
219+
/*
220+
* XOR parity has been pre-built for a region that has never had
221+
* user data written. When user writes to this region, it transitions
222+
* to BitDirty.
223+
*/
224+
BitCleanUnwritten,
211225
BitStateCount,
212226
BitNone = 0xff,
213227
};
@@ -232,6 +246,12 @@ enum llbitmap_action {
232246
* BitNeedSync.
233247
*/
234248
BitmapActionStale,
249+
/*
250+
* Proactive sync trigger for raid456 - builds XOR parity for
251+
* Unwritten regions without requiring user data write first.
252+
*/
253+
BitmapActionProactiveSync,
254+
BitmapActionClearUnwritten,
235255
BitmapActionCount,
236256
/* Init state is BitUnwritten */
237257
BitmapActionInit,
@@ -304,6 +324,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
304324
[BitmapActionDaemon] = BitNone,
305325
[BitmapActionDiscard] = BitNone,
306326
[BitmapActionStale] = BitNone,
327+
[BitmapActionProactiveSync] = BitNeedSyncUnwritten,
328+
[BitmapActionClearUnwritten] = BitNone,
307329
},
308330
[BitClean] = {
309331
[BitmapActionStartwrite] = BitDirty,
@@ -314,6 +336,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
314336
[BitmapActionDaemon] = BitNone,
315337
[BitmapActionDiscard] = BitUnwritten,
316338
[BitmapActionStale] = BitNeedSync,
339+
[BitmapActionProactiveSync] = BitNone,
340+
[BitmapActionClearUnwritten] = BitNone,
317341
},
318342
[BitDirty] = {
319343
[BitmapActionStartwrite] = BitNone,
@@ -324,6 +348,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
324348
[BitmapActionDaemon] = BitClean,
325349
[BitmapActionDiscard] = BitUnwritten,
326350
[BitmapActionStale] = BitNeedSync,
351+
[BitmapActionProactiveSync] = BitNone,
352+
[BitmapActionClearUnwritten] = BitNone,
327353
},
328354
[BitNeedSync] = {
329355
[BitmapActionStartwrite] = BitNone,
@@ -334,6 +360,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
334360
[BitmapActionDaemon] = BitNone,
335361
[BitmapActionDiscard] = BitUnwritten,
336362
[BitmapActionStale] = BitNone,
363+
[BitmapActionProactiveSync] = BitNone,
364+
[BitmapActionClearUnwritten] = BitNone,
337365
},
338366
[BitSyncing] = {
339367
[BitmapActionStartwrite] = BitNone,
@@ -344,6 +372,44 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
344372
[BitmapActionDaemon] = BitNone,
345373
[BitmapActionDiscard] = BitUnwritten,
346374
[BitmapActionStale] = BitNeedSync,
375+
[BitmapActionProactiveSync] = BitNone,
376+
[BitmapActionClearUnwritten] = BitNone,
377+
},
378+
[BitNeedSyncUnwritten] = {
379+
[BitmapActionStartwrite] = BitNeedSync,
380+
[BitmapActionStartsync] = BitSyncingUnwritten,
381+
[BitmapActionEndsync] = BitNone,
382+
[BitmapActionAbortsync] = BitUnwritten,
383+
[BitmapActionReload] = BitUnwritten,
384+
[BitmapActionDaemon] = BitNone,
385+
[BitmapActionDiscard] = BitUnwritten,
386+
[BitmapActionStale] = BitUnwritten,
387+
[BitmapActionProactiveSync] = BitNone,
388+
[BitmapActionClearUnwritten] = BitUnwritten,
389+
},
390+
[BitSyncingUnwritten] = {
391+
[BitmapActionStartwrite] = BitSyncing,
392+
[BitmapActionStartsync] = BitSyncingUnwritten,
393+
[BitmapActionEndsync] = BitCleanUnwritten,
394+
[BitmapActionAbortsync] = BitUnwritten,
395+
[BitmapActionReload] = BitUnwritten,
396+
[BitmapActionDaemon] = BitNone,
397+
[BitmapActionDiscard] = BitUnwritten,
398+
[BitmapActionStale] = BitUnwritten,
399+
[BitmapActionProactiveSync] = BitNone,
400+
[BitmapActionClearUnwritten] = BitUnwritten,
401+
},
402+
[BitCleanUnwritten] = {
403+
[BitmapActionStartwrite] = BitDirty,
404+
[BitmapActionStartsync] = BitNone,
405+
[BitmapActionEndsync] = BitNone,
406+
[BitmapActionAbortsync] = BitNone,
407+
[BitmapActionReload] = BitNone,
408+
[BitmapActionDaemon] = BitNone,
409+
[BitmapActionDiscard] = BitUnwritten,
410+
[BitmapActionStale] = BitUnwritten,
411+
[BitmapActionProactiveSync] = BitNone,
412+
[BitmapActionClearUnwritten] = BitUnwritten,
347413
},
348414
};
349415

@@ -376,14 +442,15 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
376442
pctl->state[pos] = level_456 ? BitNeedSync : BitDirty;
377443
break;
378444
case BitClean:
445+
case BitCleanUnwritten:
379446
pctl->state[pos] = BitDirty;
380447
break;
381448
}
382449
}
383450
}
384451

385452
static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
386-
int offset)
453+
int offset, bool infect)
387454
{
388455
struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
389456
unsigned int io_size = llbitmap->io_size;
@@ -398,7 +465,7 @@ static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
398465
* resync all the dirty bits, hence skip infect new dirty bits to
399466
* prevent resync unnecessary data.
400467
*/
401-
if (llbitmap->mddev->degraded) {
468+
if (llbitmap->mddev->degraded || !infect) {
402469
set_bit(block, pctl->dirty);
403470
return;
404471
}
@@ -438,7 +505,9 @@ static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
438505

439506
llbitmap->pctl[idx]->state[bit] = state;
440507
if (state == BitDirty || state == BitNeedSync)
441-
llbitmap_set_page_dirty(llbitmap, idx, bit);
508+
llbitmap_set_page_dirty(llbitmap, idx, bit, true);
509+
else if (state == BitNeedSyncUnwritten)
510+
llbitmap_set_page_dirty(llbitmap, idx, bit, false);
442511
}
443512

444513
static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
@@ -585,13 +654,73 @@ static int llbitmap_cache_pages(struct llbitmap *llbitmap)
585654
return 0;
586655
}
587656

657+
/*
658+
* Check if all underlying disks support write_zeroes with unmap.
659+
*/
660+
static bool llbitmap_all_disks_support_wzeroes_unmap(struct llbitmap *llbitmap)
661+
{
662+
struct mddev *mddev = llbitmap->mddev;
663+
struct md_rdev *rdev;
664+
665+
rdev_for_each(rdev, mddev) {
666+
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
667+
continue;
668+
669+
if (bdev_write_zeroes_unmap_sectors(rdev->bdev) == 0)
670+
return false;
671+
}
672+
673+
return true;
674+
}
675+
676+
/*
677+
* Issue write_zeroes to all underlying disks to zero their data regions.
678+
* This ensures parity consistency for RAID-456 (0 XOR 0 = 0).
679+
* Returns true if all disks were successfully zeroed.
680+
*/
681+
static bool llbitmap_zero_all_disks(struct llbitmap *llbitmap)
682+
{
683+
struct mddev *mddev = llbitmap->mddev;
684+
struct md_rdev *rdev;
685+
sector_t dev_sectors = mddev->dev_sectors;
686+
int ret;
687+
688+
rdev_for_each(rdev, mddev) {
689+
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
690+
continue;
691+
692+
ret = blkdev_issue_zeroout(rdev->bdev,
693+
rdev->data_offset,
694+
dev_sectors,
695+
GFP_KERNEL, 0);
696+
if (ret) {
697+
pr_warn("md/llbitmap: failed to zero disk %pg: %d\n",
698+
rdev->bdev, ret);
699+
return false;
700+
}
701+
}
702+
703+
return true;
704+
}
705+
588706
static void llbitmap_init_state(struct llbitmap *llbitmap)
589707
{
708+
struct mddev *mddev = llbitmap->mddev;
590709
enum llbitmap_state state = BitUnwritten;
591710
unsigned long i;
592711

593-
if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags))
712+
if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) {
594713
state = BitClean;
714+
} else if (raid_is_456(mddev) &&
715+
llbitmap_all_disks_support_wzeroes_unmap(llbitmap)) {
716+
/*
717+
* All disks support write_zeroes with unmap. Zero all disks
718+
* to ensure parity consistency, then set BitCleanUnwritten
719+
* to skip initial sync.
720+
*/
721+
if (llbitmap_zero_all_disks(llbitmap))
722+
state = BitCleanUnwritten;
723+
}
595724

596725
for (i = 0; i < llbitmap->chunks; i++)
597726
llbitmap_write(llbitmap, state, i);
@@ -627,11 +756,10 @@ static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
627756
goto write_bitmap;
628757
}
629758

630-
if (c == BitNeedSync)
759+
if (c == BitNeedSync || c == BitNeedSyncUnwritten)
631760
need_resync = !mddev->degraded;
632761

633762
state = state_machine[c][action];
634-
635763
write_bitmap:
636764
if (unlikely(mddev->degraded)) {
637765
/* For degraded array, mark new data as need sync. */
@@ -658,8 +786,7 @@ static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
658786
}
659787

660788
llbitmap_write(llbitmap, state, start);
661-
662-
if (state == BitNeedSync)
789+
if (state == BitNeedSync || state == BitNeedSyncUnwritten)
663790
need_resync = !mddev->degraded;
664791
else if (state == BitDirty &&
665792
!timer_pending(&llbitmap->pending_timer))
@@ -1229,7 +1356,7 @@ static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset)
12291356
unsigned long p = offset >> llbitmap->chunkshift;
12301357
enum llbitmap_state c = llbitmap_read(llbitmap, p);
12311358

1232-
return c == BitClean || c == BitDirty;
1359+
return c == BitClean || c == BitDirty || c == BitCleanUnwritten;
12331360
}
12341361

12351362
static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
@@ -1243,6 +1370,10 @@ static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
12431370
if (c == BitUnwritten)
12441371
return blocks;
12451372

1373+
/* Skip CleanUnwritten - no user data, will be reset after recovery */
1374+
if (c == BitCleanUnwritten)
1375+
return blocks;
1376+
12461377
/* For degraded array, don't skip */
12471378
if (mddev->degraded)
12481379
return 0;
@@ -1261,14 +1392,25 @@ static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset,
12611392
{
12621393
struct llbitmap *llbitmap = mddev->bitmap;
12631394
unsigned long p = offset >> llbitmap->chunkshift;
1395+
enum llbitmap_state state;
1396+
1397+
/*
1398+
* Before recovery starts, convert CleanUnwritten to Unwritten.
1399+
* This ensures the new disk won't have stale parity data.
1400+
*/
1401+
if (offset == 0 && test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
1402+
!test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery))
1403+
llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1404+
BitmapActionClearUnwritten);
1405+
12641406

12651407
/*
12661408
* Handle one bit at a time, this is much simpler. And it doesn't matter
12671409
* if md_do_sync() loop more times.
12681410
*/
12691411
*blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1270-
return llbitmap_state_machine(llbitmap, p, p,
1271-
BitmapActionStartsync) == BitSyncing;
1412+
state = llbitmap_state_machine(llbitmap, p, p, BitmapActionStartsync);
1413+
return state == BitSyncing || state == BitSyncingUnwritten;
12721414
}
12731415

12741416
/* Something is wrong, sync_thread stop at @offset */
@@ -1474,9 +1616,15 @@ static ssize_t bits_show(struct mddev *mddev, char *page)
14741616
}
14751617

14761618
mutex_unlock(&mddev->bitmap_info.mutex);
1477-
return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n",
1619+
return sprintf(page,
1620+
"unwritten %d\nclean %d\ndirty %d\n"
1621+
"need sync %d\nsyncing %d\n"
1622+
"need sync unwritten %d\nsyncing unwritten %d\n"
1623+
"clean unwritten %d\n",
14781624
bits[BitUnwritten], bits[BitClean], bits[BitDirty],
1479-
bits[BitNeedSync], bits[BitSyncing]);
1625+
bits[BitNeedSync], bits[BitSyncing],
1626+
bits[BitNeedSyncUnwritten], bits[BitSyncingUnwritten],
1627+
bits[BitCleanUnwritten]);
14801628
}
14811629

14821630
static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits);
@@ -1549,11 +1697,39 @@ barrier_idle_store(struct mddev *mddev, const char *buf, size_t len)
15491697

15501698
static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle);
15511699

1700+
static ssize_t
1701+
proactive_sync_store(struct mddev *mddev, const char *buf, size_t len)
1702+
{
1703+
struct llbitmap *llbitmap;
1704+
1705+
/* Only for RAID-456 */
1706+
if (!raid_is_456(mddev))
1707+
return -EINVAL;
1708+
1709+
mutex_lock(&mddev->bitmap_info.mutex);
1710+
llbitmap = mddev->bitmap;
1711+
if (!llbitmap || !llbitmap->pctl) {
1712+
mutex_unlock(&mddev->bitmap_info.mutex);
1713+
return -ENODEV;
1714+
}
1715+
1716+
/* Trigger proactive sync on all Unwritten regions */
1717+
llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1718+
BitmapActionProactiveSync);
1719+
1720+
mutex_unlock(&mddev->bitmap_info.mutex);
1721+
return len;
1722+
}
1723+
1724+
static struct md_sysfs_entry llbitmap_proactive_sync =
1725+
__ATTR(proactive_sync, 0200, NULL, proactive_sync_store);
1726+
15521727
static struct attribute *md_llbitmap_attrs[] = {
15531728
&llbitmap_bits.attr,
15541729
&llbitmap_metadata.attr,
15551730
&llbitmap_daemon_sleep.attr,
15561731
&llbitmap_barrier_idle.attr,
1732+
&llbitmap_proactive_sync.attr,
15571733
NULL
15581734
};
15591735

0 commit comments

Comments
 (0)