Skip to content

Commit 14a5104

Browse files
author
Al Viro
committed
get rid of busy-waiting in shrink_dcache_tree()
If shrink_dcache_tree() runs into a potential victim that is already dying, it must wait for that dentry to go away. To avoid busy-waiting we need some object to wait on and a way for dentry_unlist() to see that we need to be notified. The obvious place for the object to wait on would be on our stack frame. We will store a pointer to that object (struct completion_list) in victim dentry; if there's more than one thread wanting to wait for the same dentry to finish dying, we'll have their instances linked into a list, with reference in dentry pointing to the head of that list. * new object - struct completion_list. A pair of struct completion and pointer to the next instance. That's what shrink_dcache_tree() will wait on if needed. * add a new member (->waiters, opaque pointer to struct completion_list) to struct dentry. It is defined for negative live dentries that are not in-lookup ones and it will remain NULL for almost all of them. It does not conflict with ->d_rcu (defined for killed dentries), ->d_alias (defined for positive dentries, all live) or ->d_in_lookup_hash (defined for in-lookup dentries, all live negative). That allows to colocate all four members. * make sure that all places where dentry enters the state where ->waiters is defined (live, negative, not-in-lookup) initialize ->waiters to NULL. * if select_collect2() runs into a dentry that is already dying, have its caller insert a local instance of struct completion_list into the head of the list hanging off dentry->waiters and wait for completion. * if dentry_unlist() sees non-NULL ->waiters, have it carefully walk through the completion_list instances in that list, calling complete() for each. For now struct completion_list is local to fs/dcache.c; it's obviously dentry-agnostic, and it can be trivially lifted into linux/completion.h if somebody finds a reason to do so... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 5408c22 commit 14a5104

2 files changed

Lines changed: 88 additions & 9 deletions

File tree

fs/dcache.c

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,15 @@ static void dentry_unlink_inode(struct dentry * dentry)
456456
raw_write_seqcount_begin(&dentry->d_seq);
457457
__d_clear_type_and_inode(dentry);
458458
hlist_del_init(&dentry->d_alias);
459+
/*
460+
* dentry becomes negative, so the space occupied by ->d_alias
461+
* belongs to ->waiters now; we could use __hlist_del() instead
462+
* of hlist_del_init(), if not for the stunt pulled by nfs
463+
* dummy root dentries - positive dentry *not* included into
464+
* the alias list of its inode. Open-coding hlist_del_init()
465+
* and removing zeroing would be too clumsy...
466+
*/
467+
dentry->waiters = NULL;
459468
raw_write_seqcount_end(&dentry->d_seq);
460469
spin_unlock(&dentry->d_lock);
461470
spin_unlock(&inode->i_lock);
@@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry)
605614
}
606615
EXPORT_SYMBOL(d_drop);
607616

617+
struct completion_list {
618+
struct completion_list *next;
619+
struct completion completion;
620+
};
621+
622+
/*
623+
* shrink_dcache_tree() needs to be notified when dentry in process of
624+
* being evicted finally gets unlisted. Such dentries are
625+
* already with negative ->d_count
626+
* already negative
627+
* already not in in-lookup hash
628+
* reachable only via ->d_sib.
629+
*
630+
* Use ->waiters for a single-linked list of struct completion_list of
631+
* waiters.
632+
*/
633+
static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
634+
{
635+
struct completion_list *v = dentry->waiters;
636+
init_completion(&p->completion);
637+
p->next = v;
638+
dentry->waiters = p;
639+
}
640+
641+
static inline void d_complete_waiters(struct dentry *dentry)
642+
{
643+
struct completion_list *v = dentry->waiters;
644+
if (unlikely(v)) {
645+
/* some shrink_dcache_tree() instances are waiting */
646+
dentry->waiters = NULL;
647+
while (v) {
648+
struct completion *r = &v->completion;
649+
v = v->next;
650+
complete(r);
651+
}
652+
}
653+
}
654+
608655
static inline void dentry_unlist(struct dentry *dentry)
609656
{
610657
struct dentry *next;
@@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry)
613660
* attached to the dentry tree
614661
*/
615662
dentry->d_flags |= DCACHE_DENTRY_KILLED;
663+
d_complete_waiters(dentry);
616664
if (unlikely(hlist_unhashed(&dentry->d_sib)))
617665
return;
618666
__hlist_del(&dentry->d_sib);
@@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
15691617
return D_WALK_QUIT;
15701618
}
15711619
to_shrink_list(dentry, &data->dispose);
1620+
} else if (dentry->d_lockref.count < 0) {
1621+
rcu_read_lock();
1622+
data->victim = dentry;
1623+
return D_WALK_QUIT;
15721624
}
15731625
/*
15741626
* We can return to the caller if we have found some (this
@@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
16081660
data.victim = NULL;
16091661
d_walk(parent, &data, select_collect2);
16101662
if (data.victim) {
1611-
spin_lock(&data.victim->d_lock);
1612-
if (!lock_for_kill(data.victim)) {
1613-
spin_unlock(&data.victim->d_lock);
1663+
struct dentry *v = data.victim;
1664+
1665+
spin_lock(&v->d_lock);
1666+
if (v->d_lockref.count < 0 &&
1667+
!(v->d_flags & DCACHE_DENTRY_KILLED)) {
1668+
struct completion_list wait;
1669+
// It's busy dying; have it notify us once
1670+
// it becomes invisible to d_walk().
1671+
d_add_waiter(v, &wait);
1672+
spin_unlock(&v->d_lock);
1673+
rcu_read_unlock();
1674+
if (!list_empty(&data.dispose))
1675+
shrink_dentry_list(&data.dispose);
1676+
wait_for_completion(&wait.completion);
1677+
continue;
1678+
}
1679+
if (!lock_for_kill(v)) {
1680+
spin_unlock(&v->d_lock);
16141681
rcu_read_unlock();
16151682
} else {
1616-
shrink_kill(data.victim);
1683+
shrink_kill(v);
16171684
}
16181685
}
16191686
if (!list_empty(&data.dispose))
@@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
17871854
INIT_HLIST_BL_NODE(&dentry->d_hash);
17881855
INIT_LIST_HEAD(&dentry->d_lru);
17891856
INIT_HLIST_HEAD(&dentry->d_children);
1790-
INIT_HLIST_NODE(&dentry->d_alias);
1857+
dentry->waiters = NULL;
17911858
INIT_HLIST_NODE(&dentry->d_sib);
17921859

17931860
if (dentry->d_op && dentry->d_op->d_init) {
@@ -2729,7 +2796,7 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
27292796
d_wait = dentry->d_wait;
27302797
dentry->d_wait = NULL;
27312798
hlist_bl_unlock(b);
2732-
INIT_HLIST_NODE(&dentry->d_alias);
2799+
dentry->waiters = NULL;
27332800
INIT_LIST_HEAD(&dentry->d_lru);
27342801
return d_wait;
27352802
}

include/linux/dcache.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ union shortname_store {
8888

8989
#define d_lock d_lockref.lock
9090
#define d_iname d_shortname.string
91+
struct completion_list;
9192

9293
struct dentry {
9394
/* RCU lookup touched fields */
@@ -122,12 +123,23 @@ struct dentry {
122123
struct hlist_node d_sib; /* child of parent list */
123124
struct hlist_head d_children; /* our children */
124125
/*
125-
* d_alias and d_rcu can share memory
126+
* the following members can share memory - their uses are
127+
* mutually exclusive.
126128
*/
127129
union {
128-
struct hlist_node d_alias; /* inode alias list */
129-
struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */
130+
/* positives: inode alias list */
131+
struct hlist_node d_alias;
132+
/* in-lookup ones (all negative, live): hash chain */
133+
struct hlist_bl_node d_in_lookup_hash;
134+
/* killed ones: (already negative) used to schedule freeing */
130135
struct rcu_head d_rcu;
136+
/*
137+
* live non-in-lookup negatives: used if shrink_dcache_tree()
138+
* races with eviction by another thread and needs to wait for
139+
* this dentry to get killed . Remains NULL for almost all
140+
* negative dentries.
141+
*/
142+
struct completion_list *waiters;
131143
};
132144
};
133145

0 commit comments

Comments
 (0)