Skip to content

Commit 2c0ff61

Browse files
committed
Merge branch 'xfs-7.1-merge' into for-next
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2 parents e596609 + c1f9554 commit 2c0ff61

6 files changed

Lines changed: 137 additions & 57 deletions

File tree

fs/xfs/libxfs/xfs_fs.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -995,14 +995,17 @@ struct xfs_rtgroup_geometry {
995995
__u32 rg_sick; /* o: sick things in ag */
996996
__u32 rg_checked; /* o: checked metadata in ag */
997997
__u32 rg_flags; /* i/o: flags for this ag */
998-
__u32 rg_reserved[27]; /* o: zero */
998+
__u32 rg_writepointer; /* o: write pointer block offset for zoned */
999+
__u32 rg_reserved[26]; /* o: zero */
9991000
};
10001001
#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
10011002
#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
10021003
#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */
10031004
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
10041005
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
10051006

1007+
#define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */
1008+
10061009
/* Health monitor event domains */
10071010

10081011
/* affects the whole fs */

fs/xfs/xfs_file.c

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve(
560560
flags, ac);
561561
}
562562

563+
/*
564+
* We need to lock the test/set EOF update as we can be racing with
565+
* other IO completions here to update the EOF. Failing to serialise
566+
* here can result in EOF moving backwards and Bad Things Happen when
567+
* that occurs.
568+
*
569+
* As IO completion only ever extends EOF, we can do an unlocked check
570+
* here to avoid taking the spinlock. If we land within the current EOF,
571+
* then we do not need to do an extending update at all, and we don't
572+
* need to take the lock to check this. If we race with an update moving
573+
* EOF, then we'll either still be beyond EOF and need to take the lock,
574+
* or we'll be within EOF and we don't need to take it at all.
575+
*/
576+
static int
577+
xfs_dio_endio_set_isize(
578+
struct inode *inode,
579+
loff_t offset,
580+
ssize_t size)
581+
{
582+
struct xfs_inode *ip = XFS_I(inode);
583+
584+
if (offset + size <= i_size_read(inode))
585+
return 0;
586+
587+
spin_lock(&ip->i_flags_lock);
588+
if (offset + size <= i_size_read(inode)) {
589+
spin_unlock(&ip->i_flags_lock);
590+
return 0;
591+
}
592+
593+
i_size_write(inode, offset + size);
594+
spin_unlock(&ip->i_flags_lock);
595+
596+
return xfs_setfilesize(ip, offset, size);
597+
}
598+
599+
static int
600+
xfs_zoned_dio_write_end_io(
601+
struct kiocb *iocb,
602+
ssize_t size,
603+
int error,
604+
unsigned flags)
605+
{
606+
struct inode *inode = file_inode(iocb->ki_filp);
607+
struct xfs_inode *ip = XFS_I(inode);
608+
unsigned int nofs_flag;
609+
610+
ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
611+
612+
trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size);
613+
614+
if (xfs_is_shutdown(ip->i_mount))
615+
return -EIO;
616+
617+
if (error || !size)
618+
return error;
619+
620+
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
621+
622+
nofs_flag = memalloc_nofs_save();
623+
error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size);
624+
memalloc_nofs_restore(nofs_flag);
625+
626+
return error;
627+
}
628+
563629
static int
564630
xfs_dio_write_end_io(
565631
struct kiocb *iocb,
@@ -572,8 +638,7 @@ xfs_dio_write_end_io(
572638
loff_t offset = iocb->ki_pos;
573639
unsigned int nofs_flag;
574640

575-
ASSERT(!xfs_is_zoned_inode(ip) ||
576-
!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
641+
ASSERT(!xfs_is_zoned_inode(ip));
577642

578643
trace_xfs_end_io_direct_write(ip, offset, size);
579644

@@ -623,30 +688,8 @@ xfs_dio_write_end_io(
623688
* with the on-disk inode size being outside the in-core inode size. We
624689
* have no other method of updating EOF for AIO, so always do it here
625690
* if necessary.
626-
*
627-
* We need to lock the test/set EOF update as we can be racing with
628-
* other IO completions here to update the EOF. Failing to serialise
629-
* here can result in EOF moving backwards and Bad Things Happen when
630-
* that occurs.
631-
*
632-
* As IO completion only ever extends EOF, we can do an unlocked check
633-
* here to avoid taking the spinlock. If we land within the current EOF,
634-
* then we do not need to do an extending update at all, and we don't
635-
* need to take the lock to check this. If we race with an update moving
636-
* EOF, then we'll either still be beyond EOF and need to take the lock,
637-
* or we'll be within EOF and we don't need to take it at all.
638691
*/
639-
if (offset + size <= i_size_read(inode))
640-
goto out;
641-
642-
spin_lock(&ip->i_flags_lock);
643-
if (offset + size > i_size_read(inode)) {
644-
i_size_write(inode, offset + size);
645-
spin_unlock(&ip->i_flags_lock);
646-
error = xfs_setfilesize(ip, offset, size);
647-
} else {
648-
spin_unlock(&ip->i_flags_lock);
649-
}
692+
error = xfs_dio_endio_set_isize(inode, offset, size);
650693

651694
out:
652695
memalloc_nofs_restore(nofs_flag);
@@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io(
688731
static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
689732
.bio_set = &iomap_ioend_bioset,
690733
.submit_io = xfs_dio_zoned_submit_io,
691-
.end_io = xfs_dio_write_end_io,
734+
.end_io = xfs_zoned_dio_write_end_io,
692735
};
693736

694737
/*

fs/xfs/xfs_ioctl.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@
3737
#include "xfs_ioctl.h"
3838
#include "xfs_xattr.h"
3939
#include "xfs_rtbitmap.h"
40+
#include "xfs_rtrmap_btree.h"
4041
#include "xfs_file.h"
4142
#include "xfs_exchrange.h"
4243
#include "xfs_handle.h"
4344
#include "xfs_rtgroup.h"
4445
#include "xfs_healthmon.h"
4546
#include "xfs_verify_media.h"
47+
#include "xfs_zone_priv.h"
48+
#include "xfs_zone_alloc.h"
4649

4750
#include <linux/mount.h>
4851
#include <linux/fileattr.h>
@@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry(
413416
{
414417
struct xfs_rtgroup *rtg;
415418
struct xfs_rtgroup_geometry rgeo;
419+
xfs_rgblock_t highest_rgbno;
416420
int error;
417421

418422
if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
@@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry(
433437
if (error)
434438
return error;
435439

440+
if (xfs_has_zoned(mp)) {
441+
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
442+
if (rtg->rtg_open_zone) {
443+
rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated;
444+
} else {
445+
highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
446+
if (highest_rgbno == NULLRGBLOCK)
447+
rgeo.rg_writepointer = 0;
448+
else
449+
rgeo.rg_writepointer = highest_rgbno + 1;
450+
}
451+
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
452+
rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
453+
}
454+
436455
if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
437456
return -EFAULT;
438457
return 0;

fs/xfs/xfs_zone_alloc.c

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -189,27 +189,16 @@ xfs_open_zone_mark_full(
189189
xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);
190190
}
191191

192-
static void
193-
xfs_zone_record_blocks(
194-
struct xfs_trans *tp,
192+
static inline void
193+
xfs_zone_inc_written(
195194
struct xfs_open_zone *oz,
196-
xfs_fsblock_t fsbno,
197195
xfs_filblks_t len)
198196
{
199-
struct xfs_mount *mp = tp->t_mountp;
200-
struct xfs_rtgroup *rtg = oz->oz_rtg;
201-
struct xfs_inode *rmapip = rtg_rmap(rtg);
202-
203-
trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len);
197+
xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL);
204198

205-
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
206-
xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
207-
rmapip->i_used_blocks += len;
208-
ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
209199
oz->oz_written += len;
210-
if (oz->oz_written == rtg_blocks(rtg))
200+
if (oz->oz_written == rtg_blocks(oz->oz_rtg))
211201
xfs_open_zone_mark_full(oz);
212-
xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
213202
}
214203

215204
/*
@@ -227,9 +216,7 @@ xfs_zone_skip_blocks(
227216
trace_xfs_zone_skip_blocks(oz, 0, len);
228217

229218
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
230-
oz->oz_written += len;
231-
if (oz->oz_written == rtg_blocks(rtg))
232-
xfs_open_zone_mark_full(oz);
219+
xfs_zone_inc_written(oz, len);
233220
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
234221

235222
xfs_add_frextents(rtg_mount(rtg), len);
@@ -244,6 +231,8 @@ xfs_zoned_map_extent(
244231
xfs_fsblock_t old_startblock)
245232
{
246233
struct xfs_bmbt_irec data;
234+
struct xfs_rtgroup *rtg = oz->oz_rtg;
235+
struct xfs_inode *rmapip = rtg_rmap(rtg);
247236
int nmaps = 1;
248237
int error;
249238

@@ -302,7 +291,15 @@ xfs_zoned_map_extent(
302291
}
303292
}
304293

305-
xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);
294+
trace_xfs_zone_record_blocks(oz,
295+
xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock),
296+
new->br_blockcount);
297+
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
298+
xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
299+
rmapip->i_used_blocks += new->br_blockcount;
300+
ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
301+
xfs_zone_inc_written(oz, new->br_blockcount);
302+
xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
306303

307304
/* Map the new blocks into the data fork. */
308305
xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);
@@ -681,10 +678,11 @@ xfs_select_zone_nowait(
681678
if (oz)
682679
goto out_unlock;
683680

684-
if (pack_tight)
681+
if (pack_tight) {
685682
oz = xfs_select_open_zone_mru(zi, write_hint);
686-
if (oz)
687-
goto out_unlock;
683+
if (oz)
684+
goto out_unlock;
685+
}
688686

689687
/*
690688
* See if we can open a new zone and use that so that data for different
@@ -695,7 +693,7 @@ xfs_select_zone_nowait(
695693
goto out_unlock;
696694

697695
/*
698-
* Try to find an zone that is an ok match to colocate data with.
696+
* Try to find a zone that is an ok match to colocate data with.
699697
*/
700698
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
701699
if (oz)

fs/xfs/xfs_zone_gc.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,25 +170,37 @@ xfs_zoned_need_gc(
170170
s64 available, free, threshold;
171171
s32 remainder;
172172

173+
/* If we have no reclaimable blocks, running GC is useless. */
173174
if (!xfs_zoned_have_reclaimable(mp->m_zone_info))
174175
return false;
175176

177+
/*
178+
* In order to avoid file fragmentation as much as possible, we should
179+
* make sure that we can open enough zones. So trigger GC if the number
180+
* of blocks immediately available for writes is lower than the total
181+
* number of blocks from all possible open zones.
182+
*/
176183
available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);
177-
178184
if (available <
179185
xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
180186
return true;
181187

182-
free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
188+
/*
189+
* For cases where the user wants to be more aggressive with GC,
190+
* the sysfs attribute zonegc_low_space may be set to a non zero value,
191+
* to indicate that GC should try to maintain at least zonegc_low_space
192+
* percent of the free space to be directly available for writing. Check
193+
* this here.
194+
*/
195+
if (!mp->m_zonegc_low_space)
196+
return false;
183197

198+
free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
184199
threshold = div_s64_rem(free, 100, &remainder);
185200
threshold = threshold * mp->m_zonegc_low_space +
186201
remainder * div_s64(mp->m_zonegc_low_space, 100);
187202

188-
if (available < threshold)
189-
return true;
190-
191-
return false;
203+
return available < threshold;
192204
}
193205

194206
static struct xfs_zone_gc_data *

fs/xfs/xfs_zone_info.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,14 @@ xfs_zoned_show_stats(
9090
seq_printf(m, "\tRT GC required: %d\n",
9191
xfs_zoned_need_gc(mp));
9292

93+
seq_printf(m, "\ttotal number of zones: %u\n",
94+
mp->m_sb.sb_rgcount);
9395
seq_printf(m, "\tfree zones: %d\n", atomic_read(&zi->zi_nr_free_zones));
94-
seq_puts(m, "\topen zones:\n");
96+
9597
spin_lock(&zi->zi_open_zones_lock);
98+
seq_printf(m, "\tnumber of open zones: %u / %u\n",
99+
zi->zi_nr_open_zones, mp->m_max_open_zones);
100+
seq_puts(m, "\topen zones:\n");
96101
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
97102
xfs_show_open_zone(m, oz);
98103
if (zi->zi_open_gc_zone) {

0 commit comments

Comments
 (0)