Skip to content

Commit 67fe430

Browse files
Christoph Hellwigcmaiolino
authored andcommitted
xfs: don't keep a reference for buffers on the LRU
Currently the buffer cache adds a reference to b_hold for buffers that are on the LRU. This seems to go all the way back and allows releasing buffers from the LRU using xfs_buf_rele. But it makes xfs_buf_rele really complicated in differs from how other LRUs are implemented in Linux. Switch to not having a reference for buffers in the LRU, and use a separate negative hold value to mark buffers as dead. This simplifies xfs_buf_rele, which now just deal with the last "real" reference, and prepares for using the lockref primitive. This also removes the b_lock protection for removing buffers from the buffer hash. This is the desired outcome because the rhashtable is fully internally synchronized, and previously the lock was mostly held out of ordering constrains in xfs_buf_rele_cached. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
1 parent 025b245 commit 67fe430

2 files changed

Lines changed: 50 additions & 92 deletions

File tree

fs/xfs/xfs_buf.c

Lines changed: 49 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,8 @@ xfs_buf_stale(
8080

8181
spin_lock(&bp->b_lock);
8282
atomic_set(&bp->b_lru_ref, 0);
83-
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
84-
(list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru)))
85-
bp->b_hold--;
86-
87-
ASSERT(bp->b_hold >= 1);
83+
if (bp->b_hold >= 0)
84+
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
8885
spin_unlock(&bp->b_lock);
8986
}
9087

@@ -442,7 +439,7 @@ xfs_buf_try_hold(
442439
struct xfs_buf *bp)
443440
{
444441
spin_lock(&bp->b_lock);
445-
if (bp->b_hold == 0) {
442+
if (bp->b_hold == -1) {
446443
spin_unlock(&bp->b_lock);
447444
return false;
448445
}
@@ -862,76 +859,24 @@ xfs_buf_hold(
862859
}
863860

864861
static void
865-
xfs_buf_rele_uncached(
866-
struct xfs_buf *bp)
867-
{
868-
ASSERT(list_empty(&bp->b_lru));
869-
870-
spin_lock(&bp->b_lock);
871-
if (--bp->b_hold) {
872-
spin_unlock(&bp->b_lock);
873-
return;
874-
}
875-
spin_unlock(&bp->b_lock);
876-
xfs_buf_free(bp);
877-
}
878-
879-
static void
880-
xfs_buf_rele_cached(
862+
xfs_buf_destroy(
881863
struct xfs_buf *bp)
882864
{
883-
struct xfs_buftarg *btp = bp->b_target;
884-
struct xfs_perag *pag = bp->b_pag;
885-
struct xfs_buf_cache *bch = xfs_buftarg_buf_cache(btp, pag);
886-
bool freebuf = false;
887-
888-
trace_xfs_buf_rele(bp, _RET_IP_);
889-
890-
spin_lock(&bp->b_lock);
891-
ASSERT(bp->b_hold >= 1);
892-
if (bp->b_hold > 1) {
893-
bp->b_hold--;
894-
goto out_unlock;
895-
}
865+
ASSERT(bp->b_hold < 0);
866+
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
896867

897-
/* we are asked to drop the last reference */
898-
if (atomic_read(&bp->b_lru_ref)) {
899-
/*
900-
* If the buffer is added to the LRU, keep the reference to the
901-
* buffer for the LRU and clear the (now stale) dispose list
902-
* state flag, else drop the reference.
903-
*/
904-
if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru))
905-
bp->b_state &= ~XFS_BSTATE_DISPOSE;
906-
else
907-
bp->b_hold--;
908-
} else {
909-
bp->b_hold--;
910-
/*
911-
* most of the time buffers will already be removed from the
912-
* LRU, so optimise that case by checking for the
913-
* XFS_BSTATE_DISPOSE flag indicating the last list the buffer
914-
* was on was the disposal list
915-
*/
916-
if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
917-
list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
918-
} else {
919-
ASSERT(list_empty(&bp->b_lru));
920-
}
868+
if (!xfs_buf_is_uncached(bp)) {
869+
struct xfs_buf_cache *bch =
870+
xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
921871

922-
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
923872
rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
924873
xfs_buf_hash_params);
925-
if (pag)
926-
xfs_perag_put(pag);
927-
freebuf = true;
928-
}
929874

930-
out_unlock:
931-
spin_unlock(&bp->b_lock);
875+
if (bp->b_pag)
876+
xfs_perag_put(bp->b_pag);
877+
}
932878

933-
if (freebuf)
934-
xfs_buf_free(bp);
879+
xfs_buf_free(bp);
935880
}
936881

937882
/*
@@ -942,10 +887,22 @@ xfs_buf_rele(
942887
struct xfs_buf *bp)
943888
{
944889
trace_xfs_buf_rele(bp, _RET_IP_);
945-
if (xfs_buf_is_uncached(bp))
946-
xfs_buf_rele_uncached(bp);
947-
else
948-
xfs_buf_rele_cached(bp);
890+
891+
spin_lock(&bp->b_lock);
892+
if (!--bp->b_hold) {
893+
if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
894+
goto kill;
895+
list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
896+
}
897+
spin_unlock(&bp->b_lock);
898+
return;
899+
900+
kill:
901+
bp->b_hold = -1;
902+
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
903+
spin_unlock(&bp->b_lock);
904+
905+
xfs_buf_destroy(bp);
949906
}
950907

951908
/*
@@ -1254,9 +1211,11 @@ xfs_buf_ioerror_alert(
12541211

12551212
/*
12561213
* To simulate an I/O failure, the buffer must be locked and held with at least
1257-
* three references. The LRU reference is dropped by the stale call. The buf
1258-
* item reference is dropped via ioend processing. The third reference is owned
1259-
* by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
1214+
* two references.
1215+
*
1216+
* The buf item reference is dropped via ioend processing. The second reference
1217+
* is owned by the caller and is dropped on I/O completion if the buffer is
1218+
* XBF_ASYNC.
12601219
*/
12611220
void
12621221
xfs_buf_ioend_fail(
@@ -1514,19 +1473,14 @@ xfs_buftarg_drain_rele(
15141473

15151474
if (!spin_trylock(&bp->b_lock))
15161475
return LRU_SKIP;
1517-
if (bp->b_hold > 1) {
1476+
if (bp->b_hold > 0) {
15181477
/* need to wait, so skip it this pass */
15191478
spin_unlock(&bp->b_lock);
15201479
trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
15211480
return LRU_SKIP;
15221481
}
15231482

1524-
/*
1525-
* clear the LRU reference count so the buffer doesn't get
1526-
* ignored in xfs_buf_rele().
1527-
*/
1528-
atomic_set(&bp->b_lru_ref, 0);
1529-
bp->b_state |= XFS_BSTATE_DISPOSE;
1483+
bp->b_hold = -1;
15301484
list_lru_isolate_move(lru, item, dispose);
15311485
spin_unlock(&bp->b_lock);
15321486
return LRU_REMOVED;
@@ -1581,7 +1535,7 @@ xfs_buftarg_drain(
15811535
"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
15821536
(long long)xfs_buf_daddr(bp));
15831537
}
1584-
xfs_buf_rele(bp);
1538+
xfs_buf_destroy(bp);
15851539
}
15861540
if (loop++ != 0)
15871541
delay(100);
@@ -1625,7 +1579,17 @@ xfs_buftarg_isolate(
16251579
return LRU_ROTATE;
16261580
}
16271581

1628-
bp->b_state |= XFS_BSTATE_DISPOSE;
1582+
/*
1583+
* If the buffer is in use, remove it from the LRU for now as we can't
1584+
* free it. It will be freed when the last reference drops.
1585+
*/
1586+
if (bp->b_hold > 0) {
1587+
list_lru_isolate(lru, &bp->b_lru);
1588+
spin_unlock(&bp->b_lock);
1589+
return LRU_REMOVED;
1590+
}
1591+
1592+
bp->b_hold = -1;
16291593
list_lru_isolate_move(lru, item, dispose);
16301594
spin_unlock(&bp->b_lock);
16311595
return LRU_REMOVED;
@@ -1647,7 +1611,7 @@ xfs_buftarg_shrink_scan(
16471611
struct xfs_buf *bp;
16481612
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
16491613
list_del_init(&bp->b_lru);
1650-
xfs_buf_rele(bp);
1614+
xfs_buf_destroy(bp);
16511615
}
16521616

16531617
return freed;

fs/xfs/xfs_buf.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,6 @@ typedef unsigned int xfs_buf_flags_t;
6868
{ XBF_INCORE, "INCORE" }, \
6969
{ XBF_TRYLOCK, "TRYLOCK" }
7070

71-
/*
72-
* Internal state flags.
73-
*/
74-
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
75-
7671
struct xfs_buf_cache {
7772
struct rhashtable bc_hash;
7873
};
@@ -159,7 +154,7 @@ struct xfs_buf {
159154

160155
xfs_daddr_t b_rhash_key; /* buffer cache index */
161156
int b_length; /* size of buffer in BBs */
162-
unsigned int b_hold; /* reference count */
157+
int b_hold; /* reference count */
163158
atomic_t b_lru_ref; /* lru reclaim ref count */
164159
xfs_buf_flags_t b_flags; /* status flags */
165160
struct semaphore b_sema; /* semaphore for lockables */
@@ -170,7 +165,6 @@ struct xfs_buf {
170165
*/
171166
struct list_head b_lru; /* lru list */
172167
spinlock_t b_lock; /* internal state lock */
173-
unsigned int b_state; /* internal state flags */
174168
wait_queue_head_t b_waiters; /* unpin waiters */
175169
struct list_head b_list;
176170
struct xfs_perag *b_pag;

0 commit comments

Comments
 (0)