Skip to content

Commit df236c9

Browse files
committed
Merge branch 'xfs-7.1-merge' into for-next
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2 parents e9b7a02 + 388bb26 commit df236c9

5 files changed

Lines changed: 169 additions & 78 deletions

File tree

fs/iomap/buffered-io.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,16 +1632,12 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
16321632
while ((ret = iomap_iter(&iter, ops)) > 0) {
16331633
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
16341634

1635-
if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
1636-
srcmap->type != IOMAP_UNWRITTEN))
1637-
return -EIO;
1638-
16391635
if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
16401636
(srcmap->type == IOMAP_HOLE ||
16411637
srcmap->type == IOMAP_UNWRITTEN)) {
16421638
s64 status;
16431639

1644-
if (range_dirty) {
1640+
if (range_dirty && srcmap->type == IOMAP_UNWRITTEN) {
16451641
range_dirty = false;
16461642
status = iomap_zero_iter_flush_and_stale(&iter);
16471643
} else {

fs/xfs/xfs_file.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,23 @@ xfs_falloc_insert_range(
13061306
if (offset >= isize)
13071307
return -EINVAL;
13081308

1309+
/*
1310+
* Let writeback clean up EOF folio state before we bump i_size. The
1311+
* insert flushes before it starts shifting and under certain
1312+
* circumstances we can write back blocks that should technically be
1313+
* considered post-eof (and thus should not be submitted for writeback).
1314+
*
1315+
* For example, a large, dirty folio that spans EOF and is backed by
1316+
* post-eof COW fork preallocation can cause block remap into the data
1317+
* fork. This shifts back out beyond EOF, but creates an expectedly
1318+
* written post-eof block. The insert is going to flush, unmap and
1319+
* cancel prealloc across this whole range, so flush EOF now before we
1320+
* bump i_size to provide consistent behavior.
1321+
*/
1322+
error = filemap_write_and_wait_range(inode->i_mapping, isize, isize);
1323+
if (error)
1324+
return error;
1325+
13091326
error = xfs_falloc_setsize(file, isize + len);
13101327
if (error)
13111328
return error;

fs/xfs/xfs_iomap.c

Lines changed: 112 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,7 @@ xfs_zoned_buffered_write_iomap_begin(
15901590
{
15911591
struct iomap_iter *iter =
15921592
container_of(iomap, struct iomap_iter, iomap);
1593+
struct address_space *mapping = inode->i_mapping;
15931594
struct xfs_zone_alloc_ctx *ac = iter->private;
15941595
struct xfs_inode *ip = XFS_I(inode);
15951596
struct xfs_mount *mp = ip->i_mount;
@@ -1614,6 +1615,7 @@ xfs_zoned_buffered_write_iomap_begin(
16141615
if (error)
16151616
return error;
16161617

1618+
restart:
16171619
error = xfs_ilock_for_iomap(ip, flags, &lockmode);
16181620
if (error)
16191621
return error;
@@ -1651,14 +1653,6 @@ xfs_zoned_buffered_write_iomap_begin(
16511653
&smap))
16521654
smap.br_startoff = end_fsb; /* fake hole until EOF */
16531655
if (smap.br_startoff > offset_fsb) {
1654-
/*
1655-
* We never need to allocate blocks for zeroing a hole.
1656-
*/
1657-
if (flags & IOMAP_ZERO) {
1658-
xfs_hole_to_iomap(ip, iomap, offset_fsb,
1659-
smap.br_startoff);
1660-
goto out_unlock;
1661-
}
16621656
end_fsb = min(end_fsb, smap.br_startoff);
16631657
} else {
16641658
end_fsb = min(end_fsb,
@@ -1690,6 +1684,33 @@ xfs_zoned_buffered_write_iomap_begin(
16901684
count_fsb = min3(end_fsb - offset_fsb, XFS_MAX_BMBT_EXTLEN,
16911685
XFS_B_TO_FSB(mp, 1024 * PAGE_SIZE));
16921686

1687+
/*
1688+
* When zeroing, don't allocate blocks for holes as they are already
1689+
* zeroes, but we need to ensure that no extents exist in both the data
1690+
* and COW fork to ensure this really is a hole.
1691+
*
1692+
* A window exists where we might observe a hole in both forks with
1693+
* valid data in cache. Writeback removes the COW fork blocks on
1694+
* submission but doesn't remap into the data fork until completion. If
1695+
* the data fork was previously a hole, we'll fail to zero. Until we
1696+
* find a way to avoid this transient state, check for dirty pagecache
1697+
* and flush to wait on blocks to land in the data fork.
1698+
*/
1699+
if ((flags & IOMAP_ZERO) && srcmap->type == IOMAP_HOLE) {
1700+
if (filemap_range_needs_writeback(mapping, offset,
1701+
offset + count - 1)) {
1702+
xfs_iunlock(ip, lockmode);
1703+
error = filemap_write_and_wait_range(mapping, offset,
1704+
offset + count - 1);
1705+
if (error)
1706+
return error;
1707+
goto restart;
1708+
}
1709+
1710+
xfs_hole_to_iomap(ip, iomap, offset_fsb, end_fsb);
1711+
goto out_unlock;
1712+
}
1713+
16931714
/*
16941715
* The block reservation is supposed to cover all blocks that the
16951716
* operation could possible write, but there is a nasty corner case
@@ -1764,6 +1785,8 @@ xfs_buffered_write_iomap_begin(
17641785
struct xfs_mount *mp = ip->i_mount;
17651786
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
17661787
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
1788+
xfs_fileoff_t cow_fsb = NULLFILEOFF;
1789+
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
17671790
struct xfs_bmbt_irec imap, cmap;
17681791
struct xfs_iext_cursor icur, ccur;
17691792
xfs_fsblock_t prealloc_blocks = 0;
@@ -1808,30 +1831,96 @@ xfs_buffered_write_iomap_begin(
18081831
goto out_unlock;
18091832

18101833
/*
1811-
* Search the data fork first to look up our source mapping. We
1812-
* always need the data fork map, as we have to return it to the
1813-
* iomap code so that the higher level write code can read data in to
1814-
* perform read-modify-write cycles for unaligned writes.
1834+
* Search the data fork first to look up our source mapping. We always
1835+
* need the data fork map, as we have to return it to the iomap code so
1836+
* that the higher level write code can read data in to perform
1837+
* read-modify-write cycles for unaligned writes.
1838+
*
1839+
* Then search the COW fork extent list even if we did not find a data
1840+
* fork extent. This serves two purposes: first this implements the
1841+
* speculative preallocation using cowextsize, so that we also unshare
1842+
* block adjacent to shared blocks instead of just the shared blocks
1843+
* themselves. Second the lookup in the extent list is generally faster
1844+
* than going out to the shared extent tree.
18151845
*/
18161846
eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
18171847
if (eof)
18181848
imap.br_startoff = end_fsb; /* fake hole until the end */
1849+
if (xfs_is_cow_inode(ip)) {
1850+
if (!ip->i_cowfp) {
1851+
ASSERT(!xfs_is_reflink_inode(ip));
1852+
xfs_ifork_init_cow(ip);
1853+
}
1854+
cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
1855+
&ccur, &cmap);
1856+
if (!cow_eof)
1857+
cow_fsb = cmap.br_startoff;
1858+
}
18191859

1820-
/* We never need to allocate blocks for zeroing or unsharing a hole. */
1821-
if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
1822-
imap.br_startoff > offset_fsb) {
1860+
/* We never need to allocate blocks for unsharing a hole. */
1861+
if ((flags & IOMAP_UNSHARE) && imap.br_startoff > offset_fsb) {
18231862
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
18241863
goto out_unlock;
18251864
}
18261865

1866+
/*
1867+
* We may need to zero over a hole in the data fork if it's fronted by
1868+
* COW blocks and dirty pagecache. Scan such file ranges for dirty
1869+
* cache and fill the iomap batch with folios that need zeroing.
1870+
*/
1871+
if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
1872+
loff_t start, end;
1873+
unsigned int fbatch_count;
1874+
1875+
imap.br_blockcount = imap.br_startoff - offset_fsb;
1876+
imap.br_startoff = offset_fsb;
1877+
imap.br_startblock = HOLESTARTBLOCK;
1878+
imap.br_state = XFS_EXT_NORM;
1879+
1880+
if (cow_fsb == NULLFILEOFF)
1881+
goto found_imap;
1882+
if (cow_fsb > offset_fsb) {
1883+
xfs_trim_extent(&imap, offset_fsb,
1884+
cow_fsb - offset_fsb);
1885+
goto found_imap;
1886+
}
1887+
1888+
/* no zeroing beyond eof, so split at the boundary */
1889+
if (offset_fsb >= eof_fsb)
1890+
goto found_imap;
1891+
if (offset_fsb < eof_fsb && end_fsb > eof_fsb)
1892+
xfs_trim_extent(&imap, offset_fsb,
1893+
eof_fsb - offset_fsb);
1894+
1895+
/* COW fork blocks overlap the hole */
1896+
xfs_trim_extent(&imap, offset_fsb,
1897+
cmap.br_startoff + cmap.br_blockcount - offset_fsb);
1898+
start = XFS_FSB_TO_B(mp, imap.br_startoff);
1899+
end = XFS_FSB_TO_B(mp, imap.br_startoff + imap.br_blockcount);
1900+
fbatch_count = iomap_fill_dirty_folios(iter, &start, end,
1901+
&iomap_flags);
1902+
xfs_trim_extent(&imap, offset_fsb,
1903+
XFS_B_TO_FSB(mp, start) - offset_fsb);
1904+
1905+
/*
1906+
* Report the COW mapping if we have folios to zero. Otherwise
1907+
* ignore the COW blocks as preallocation and report a hole.
1908+
*/
1909+
if (fbatch_count) {
1910+
xfs_trim_extent(&cmap, imap.br_startoff,
1911+
imap.br_blockcount);
1912+
imap.br_startoff = end_fsb; /* fake hole */
1913+
goto found_cow;
1914+
}
1915+
goto found_imap;
1916+
}
1917+
18271918
/*
18281919
* For zeroing, trim extents that extend beyond the EOF block. If a
18291920
* delalloc extent starts beyond the EOF block, convert it to an
18301921
* unwritten extent.
18311922
*/
18321923
if (flags & IOMAP_ZERO) {
1833-
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
1834-
18351924
if (isnullstartblock(imap.br_startblock) &&
18361925
offset_fsb >= eof_fsb)
18371926
goto convert_delay;
@@ -1864,24 +1953,13 @@ xfs_buffered_write_iomap_begin(
18641953
}
18651954

18661955
/*
1867-
* Search the COW fork extent list even if we did not find a data fork
1868-
* extent. This serves two purposes: first this implements the
1869-
* speculative preallocation using cowextsize, so that we also unshare
1870-
* block adjacent to shared blocks instead of just the shared blocks
1871-
* themselves. Second the lookup in the extent list is generally faster
1872-
* than going out to the shared extent tree.
1956+
* Now that we've handled any operation specific special cases, at this
1957+
* point we can report a COW mapping if found.
18731958
*/
1874-
if (xfs_is_cow_inode(ip)) {
1875-
if (!ip->i_cowfp) {
1876-
ASSERT(!xfs_is_reflink_inode(ip));
1877-
xfs_ifork_init_cow(ip);
1878-
}
1879-
cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
1880-
&ccur, &cmap);
1881-
if (!cow_eof && cmap.br_startoff <= offset_fsb) {
1882-
trace_xfs_reflink_cow_found(ip, &cmap);
1883-
goto found_cow;
1884-
}
1959+
if (xfs_is_cow_inode(ip) &&
1960+
!cow_eof && cmap.br_startoff <= offset_fsb) {
1961+
trace_xfs_reflink_cow_found(ip, &cmap);
1962+
goto found_cow;
18851963
}
18861964

18871965
if (imap.br_startoff <= offset_fsb) {

fs/xfs/xfs_mount.c

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,36 @@
4444
#include "xfs_healthmon.h"
4545

4646
static DEFINE_MUTEX(xfs_uuid_table_mutex);
47-
static int xfs_uuid_table_size;
48-
static uuid_t *xfs_uuid_table;
47+
static DEFINE_XARRAY_ALLOC(xfs_uuid_table);
48+
49+
static uuid_t *
50+
xfs_uuid_search(
51+
uuid_t *new_uuid)
52+
{
53+
unsigned long index = 0;
54+
uuid_t *uuid;
55+
56+
xa_for_each(&xfs_uuid_table, index, uuid) {
57+
if (uuid_equal(uuid, new_uuid))
58+
return uuid;
59+
}
60+
return NULL;
61+
}
62+
63+
static void
64+
xfs_uuid_delete(
65+
uuid_t *uuid,
66+
unsigned int index)
67+
{
68+
ASSERT(uuid_equal(xa_load(&xfs_uuid_table, index), uuid));
69+
xa_erase(&xfs_uuid_table, index);
70+
}
4971

5072
void
5173
xfs_uuid_table_free(void)
5274
{
53-
if (xfs_uuid_table_size == 0)
54-
return;
55-
kfree(xfs_uuid_table);
56-
xfs_uuid_table = NULL;
57-
xfs_uuid_table_size = 0;
75+
ASSERT(xa_empty(&xfs_uuid_table));
76+
xa_destroy(&xfs_uuid_table);
5877
}
5978

6079
/*
@@ -66,7 +85,7 @@ xfs_uuid_mount(
6685
struct xfs_mount *mp)
6786
{
6887
uuid_t *uuid = &mp->m_sb.sb_uuid;
69-
int hole, i;
88+
int ret;
7089

7190
/* Publish UUID in struct super_block */
7291
super_set_uuid(mp->m_super, uuid->b, sizeof(*uuid));
@@ -80,52 +99,30 @@ xfs_uuid_mount(
8099
}
81100

82101
mutex_lock(&xfs_uuid_table_mutex);
83-
for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
84-
if (uuid_is_null(&xfs_uuid_table[i])) {
85-
hole = i;
86-
continue;
87-
}
88-
if (uuid_equal(uuid, &xfs_uuid_table[i]))
89-
goto out_duplicate;
90-
}
91-
92-
if (hole < 0) {
93-
xfs_uuid_table = krealloc(xfs_uuid_table,
94-
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
95-
GFP_KERNEL | __GFP_NOFAIL);
96-
hole = xfs_uuid_table_size++;
102+
if (unlikely(xfs_uuid_search(uuid))) {
103+
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount",
104+
uuid);
105+
mutex_unlock(&xfs_uuid_table_mutex);
106+
return -EINVAL;
97107
}
98-
xfs_uuid_table[hole] = *uuid;
99-
mutex_unlock(&xfs_uuid_table_mutex);
100108

101-
return 0;
102-
103-
out_duplicate:
109+
ret = xa_alloc(&xfs_uuid_table, &mp->m_uuid_table_index, uuid,
110+
xa_limit_32b, GFP_KERNEL);
104111
mutex_unlock(&xfs_uuid_table_mutex);
105-
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
106-
return -EINVAL;
112+
return ret;
107113
}
108114

109115
STATIC void
110116
xfs_uuid_unmount(
111117
struct xfs_mount *mp)
112118
{
113119
uuid_t *uuid = &mp->m_sb.sb_uuid;
114-
int i;
115120

116121
if (xfs_has_nouuid(mp))
117122
return;
118123

119124
mutex_lock(&xfs_uuid_table_mutex);
120-
for (i = 0; i < xfs_uuid_table_size; i++) {
121-
if (uuid_is_null(&xfs_uuid_table[i]))
122-
continue;
123-
if (!uuid_equal(uuid, &xfs_uuid_table[i]))
124-
continue;
125-
memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
126-
break;
127-
}
128-
ASSERT(i < xfs_uuid_table_size);
125+
xfs_uuid_delete(uuid, mp->m_uuid_table_index);
129126
mutex_unlock(&xfs_uuid_table_mutex);
130127
}
131128

fs/xfs/xfs_mount.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ typedef struct xfs_mount {
346346

347347
/* Private data referring to a health monitor object. */
348348
struct xfs_healthmon __rcu *m_healthmon;
349+
350+
/* Index of uuid record in the uuid xarray. */
351+
unsigned int m_uuid_table_index;
349352
} xfs_mount_t;
350353

351354
#define M_IGEO(mp) (&(mp)->m_ino_geo)

0 commit comments

Comments
 (0)