@@ -1590,6 +1590,7 @@ xfs_zoned_buffered_write_iomap_begin(
15901590{
15911591 struct iomap_iter * iter =
15921592 container_of (iomap , struct iomap_iter , iomap );
1593+ struct address_space * mapping = inode -> i_mapping ;
15931594 struct xfs_zone_alloc_ctx * ac = iter -> private ;
15941595 struct xfs_inode * ip = XFS_I (inode );
15951596 struct xfs_mount * mp = ip -> i_mount ;
@@ -1614,6 +1615,7 @@ xfs_zoned_buffered_write_iomap_begin(
16141615 if (error )
16151616 return error ;
16161617
1618+ restart :
16171619 error = xfs_ilock_for_iomap (ip , flags , & lockmode );
16181620 if (error )
16191621 return error ;
@@ -1651,14 +1653,6 @@ xfs_zoned_buffered_write_iomap_begin(
16511653 & smap ))
16521654 smap .br_startoff = end_fsb ; /* fake hole until EOF */
16531655 if (smap .br_startoff > offset_fsb ) {
1654- /*
1655- * We never need to allocate blocks for zeroing a hole.
1656- */
1657- if (flags & IOMAP_ZERO ) {
1658- xfs_hole_to_iomap (ip , iomap , offset_fsb ,
1659- smap .br_startoff );
1660- goto out_unlock ;
1661- }
16621656 end_fsb = min (end_fsb , smap .br_startoff );
16631657 } else {
16641658 end_fsb = min (end_fsb ,
@@ -1690,6 +1684,33 @@ xfs_zoned_buffered_write_iomap_begin(
16901684 count_fsb = min3 (end_fsb - offset_fsb , XFS_MAX_BMBT_EXTLEN ,
16911685 XFS_B_TO_FSB (mp , 1024 * PAGE_SIZE ));
16921686
1687+ /*
1688+ * When zeroing, don't allocate blocks for holes as they are already
1689+ * zeroes, but we need to ensure that no extents exist in both the data
1690+ * and COW fork to ensure this really is a hole.
1691+ *
1692+ * A window exists where we might observe a hole in both forks with
1693+ * valid data in cache. Writeback removes the COW fork blocks on
1694+ * submission but doesn't remap into the data fork until completion. If
1695+ * the data fork was previously a hole, we'll fail to zero. Until we
1696+ * find a way to avoid this transient state, check for dirty pagecache
1697+ * and flush to wait on blocks to land in the data fork.
1698+ */
1699+ if ((flags & IOMAP_ZERO ) && srcmap -> type == IOMAP_HOLE ) {
1700+ if (filemap_range_needs_writeback (mapping , offset ,
1701+ offset + count - 1 )) {
1702+ xfs_iunlock (ip , lockmode );
1703+ error = filemap_write_and_wait_range (mapping , offset ,
1704+ offset + count - 1 );
1705+ if (error )
1706+ return error ;
1707+ goto restart ;
1708+ }
1709+
1710+ xfs_hole_to_iomap (ip , iomap , offset_fsb , end_fsb );
1711+ goto out_unlock ;
1712+ }
1713+
16931714 /*
16941715 * The block reservation is supposed to cover all blocks that the
16951716 * operation could possible write, but there is a nasty corner case
@@ -1764,6 +1785,8 @@ xfs_buffered_write_iomap_begin(
17641785 struct xfs_mount * mp = ip -> i_mount ;
17651786 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT (mp , offset );
17661787 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb (mp , offset , count );
1788+ xfs_fileoff_t cow_fsb = NULLFILEOFF ;
1789+ xfs_fileoff_t eof_fsb = XFS_B_TO_FSB (mp , XFS_ISIZE (ip ));
17671790 struct xfs_bmbt_irec imap , cmap ;
17681791 struct xfs_iext_cursor icur , ccur ;
17691792 xfs_fsblock_t prealloc_blocks = 0 ;
@@ -1808,30 +1831,96 @@ xfs_buffered_write_iomap_begin(
18081831 goto out_unlock ;
18091832
18101833 /*
1811- * Search the data fork first to look up our source mapping. We
1812- * always need the data fork map, as we have to return it to the
1813- * iomap code so that the higher level write code can read data in to
1814- * perform read-modify-write cycles for unaligned writes.
1834+ * Search the data fork first to look up our source mapping. We always
1835+ * need the data fork map, as we have to return it to the iomap code so
1836+ * that the higher level write code can read data in to perform
1837+ * read-modify-write cycles for unaligned writes.
1838+ *
1839+ * Then search the COW fork extent list even if we did not find a data
1840+ * fork extent. This serves two purposes: first this implements the
1841+ * speculative preallocation using cowextsize, so that we also unshare
1842+ * block adjacent to shared blocks instead of just the shared blocks
1843+ * themselves. Second the lookup in the extent list is generally faster
1844+ * than going out to the shared extent tree.
18151845 */
18161846 eof = !xfs_iext_lookup_extent (ip , & ip -> i_df , offset_fsb , & icur , & imap );
18171847 if (eof )
18181848 imap .br_startoff = end_fsb ; /* fake hole until the end */
1849+ if (xfs_is_cow_inode (ip )) {
1850+ if (!ip -> i_cowfp ) {
1851+ ASSERT (!xfs_is_reflink_inode (ip ));
1852+ xfs_ifork_init_cow (ip );
1853+ }
1854+ cow_eof = !xfs_iext_lookup_extent (ip , ip -> i_cowfp , offset_fsb ,
1855+ & ccur , & cmap );
1856+ if (!cow_eof )
1857+ cow_fsb = cmap .br_startoff ;
1858+ }
18191859
1820- /* We never need to allocate blocks for zeroing or unsharing a hole. */
1821- if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO )) &&
1822- imap .br_startoff > offset_fsb ) {
1860+ /* We never need to allocate blocks for unsharing a hole. */
1861+ if ((flags & IOMAP_UNSHARE ) && imap .br_startoff > offset_fsb ) {
18231862 xfs_hole_to_iomap (ip , iomap , offset_fsb , imap .br_startoff );
18241863 goto out_unlock ;
18251864 }
18261865
1866+ /*
1867+ * We may need to zero over a hole in the data fork if it's fronted by
1868+ * COW blocks and dirty pagecache. Scan such file ranges for dirty
1869+ * cache and fill the iomap batch with folios that need zeroing.
1870+ */
1871+ if ((flags & IOMAP_ZERO ) && imap .br_startoff > offset_fsb ) {
1872+ loff_t start , end ;
1873+ unsigned int fbatch_count ;
1874+
1875+ imap .br_blockcount = imap .br_startoff - offset_fsb ;
1876+ imap .br_startoff = offset_fsb ;
1877+ imap .br_startblock = HOLESTARTBLOCK ;
1878+ imap .br_state = XFS_EXT_NORM ;
1879+
1880+ if (cow_fsb == NULLFILEOFF )
1881+ goto found_imap ;
1882+ if (cow_fsb > offset_fsb ) {
1883+ xfs_trim_extent (& imap , offset_fsb ,
1884+ cow_fsb - offset_fsb );
1885+ goto found_imap ;
1886+ }
1887+
1888+ /* no zeroing beyond eof, so split at the boundary */
1889+ if (offset_fsb >= eof_fsb )
1890+ goto found_imap ;
1891+ if (offset_fsb < eof_fsb && end_fsb > eof_fsb )
1892+ xfs_trim_extent (& imap , offset_fsb ,
1893+ eof_fsb - offset_fsb );
1894+
1895+ /* COW fork blocks overlap the hole */
1896+ xfs_trim_extent (& imap , offset_fsb ,
1897+ cmap .br_startoff + cmap .br_blockcount - offset_fsb );
1898+ start = XFS_FSB_TO_B (mp , imap .br_startoff );
1899+ end = XFS_FSB_TO_B (mp , imap .br_startoff + imap .br_blockcount );
1900+ fbatch_count = iomap_fill_dirty_folios (iter , & start , end ,
1901+ & iomap_flags );
1902+ xfs_trim_extent (& imap , offset_fsb ,
1903+ XFS_B_TO_FSB (mp , start ) - offset_fsb );
1904+
1905+ /*
1906+ * Report the COW mapping if we have folios to zero. Otherwise
1907+ * ignore the COW blocks as preallocation and report a hole.
1908+ */
1909+ if (fbatch_count ) {
1910+ xfs_trim_extent (& cmap , imap .br_startoff ,
1911+ imap .br_blockcount );
1912+ imap .br_startoff = end_fsb ; /* fake hole */
1913+ goto found_cow ;
1914+ }
1915+ goto found_imap ;
1916+ }
1917+
18271918 /*
18281919 * For zeroing, trim extents that extend beyond the EOF block. If a
18291920 * delalloc extent starts beyond the EOF block, convert it to an
18301921 * unwritten extent.
18311922 */
18321923 if (flags & IOMAP_ZERO ) {
1833- xfs_fileoff_t eof_fsb = XFS_B_TO_FSB (mp , XFS_ISIZE (ip ));
1834-
18351924 if (isnullstartblock (imap .br_startblock ) &&
18361925 offset_fsb >= eof_fsb )
18371926 goto convert_delay ;
@@ -1864,24 +1953,13 @@ xfs_buffered_write_iomap_begin(
18641953 }
18651954
18661955 /*
1867- * Search the COW fork extent list even if we did not find a data fork
1868- * extent. This serves two purposes: first this implements the
1869- * speculative preallocation using cowextsize, so that we also unshare
1870- * block adjacent to shared blocks instead of just the shared blocks
1871- * themselves. Second the lookup in the extent list is generally faster
1872- * than going out to the shared extent tree.
1956+ * Now that we've handled any operation specific special cases, at this
1957+ * point we can report a COW mapping if found.
18731958 */
1874- if (xfs_is_cow_inode (ip )) {
1875- if (!ip -> i_cowfp ) {
1876- ASSERT (!xfs_is_reflink_inode (ip ));
1877- xfs_ifork_init_cow (ip );
1878- }
1879- cow_eof = !xfs_iext_lookup_extent (ip , ip -> i_cowfp , offset_fsb ,
1880- & ccur , & cmap );
1881- if (!cow_eof && cmap .br_startoff <= offset_fsb ) {
1882- trace_xfs_reflink_cow_found (ip , & cmap );
1883- goto found_cow ;
1884- }
1959+ if (xfs_is_cow_inode (ip ) &&
1960+ !cow_eof && cmap .br_startoff <= offset_fsb ) {
1961+ trace_xfs_reflink_cow_found (ip , & cmap );
1962+ goto found_cow ;
18851963 }
18861964
18871965 if (imap .br_startoff <= offset_fsb ) {
0 commit comments