Skip to content

Commit 292a2bc

Browse files
committed
Merge tag 'pull-dcache-busy-wait' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull dcache busy loop updates from Al Viro: "Fix livelocks in shrink_dcache_tree() If shrink_dcache_tree() finds a dentry in the middle of being killed by another thread, it has to wait until the victim finishes dying, gets detached from the tree and ceases to pin its parent. The way we used to deal with that amounted to busy-wait; unfortunately, it's not just inefficient but can lead to reliably reproducible hard livelocks. Solved by having shrink_dentry_tree() attach a completion to such dentry, with dentry_unlist() calling complete() on all objects attached to it. With a bit of care it can be done without growing struct dentry or adding overhead in normal case" * tag 'pull-dcache-busy-wait' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: get rid of busy-waiting in shrink_dcache_tree() dcache.c: more idiomatic "positives are not allowed" sanity checks struct dentry: make ->d_u anonymous for_each_alias(): helper macro for iterating through dentries of given inode
2 parents b4e0758 + 14a5104 commit 292a2bc

13 files changed

Lines changed: 140 additions & 47 deletions

File tree

Documentation/filesystems/porting.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,10 +1368,20 @@ lifetime, consider using inode_set_cached_link() instead.
13681368

13691369
lookup_one_qstr_excl() is no longer exported - use start_creating() or
13701370
similar.
1371+
13711372
---
13721373

13731374
** mandatory**
13741375

13751376
lock_rename(), lock_rename_child(), unlock_rename() are no
13761377
longer available. Use start_renaming() or similar.
13771378

1379+
---
1380+
1381+
**recommended**
1382+
1383+
If you really need to iterate through dentries for given inode, use
1384+
for_each_alias(dentry, inode) instead of hlist_for_each_entry; better
1385+
yet, see if any of the exported primitives could be used instead of
1386+
the entire loop. You still need to hold ->i_lock of the inode over
1387+
either form of manual loop.

fs/affs/amigaffs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
126126
{
127127
struct dentry *dentry;
128128
spin_lock(&inode->i_lock);
129-
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
129+
for_each_alias(dentry, inode) {
130130
if (entry_ino == (u32)(long)dentry->d_fsdata) {
131131
dentry->d_fsdata = (void *)(unsigned long)inode->i_ino;
132132
break;

fs/ceph/mds_client.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4608,13 +4608,13 @@ static struct dentry* d_find_primary(struct inode *inode)
46084608
goto out_unlock;
46094609

46104610
if (S_ISDIR(inode->i_mode)) {
4611-
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
4611+
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
46124612
if (!IS_ROOT(alias))
46134613
dn = dget(alias);
46144614
goto out_unlock;
46154615
}
46164616

4617-
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
4617+
for_each_alias(alias, inode) {
46184618
spin_lock(&alias->d_lock);
46194619
if (!d_unhashed(alias) &&
46204620
(ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {

fs/dcache.c

Lines changed: 98 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
/*
4141
* Usage:
4242
* dcache->d_inode->i_lock protects:
43-
* - i_dentry, d_u.d_alias, d_inode of aliases
43+
* - i_dentry, d_alias, d_inode of aliases
4444
* dcache_hash_bucket lock protects:
4545
* - the dcache hash table
4646
* s_roots bl list spinlock protects:
@@ -55,7 +55,7 @@
5555
* - d_unhashed()
5656
* - d_parent and d_chilren
5757
* - childrens' d_sib and d_parent
58-
* - d_u.d_alias, d_inode
58+
* - d_alias, d_inode
5959
*
6060
* Ordering:
6161
* dentry->d_inode->i_lock
@@ -341,14 +341,14 @@ static inline struct external_name *external_name(struct dentry *dentry)
341341

342342
static void __d_free(struct rcu_head *head)
343343
{
344-
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
344+
struct dentry *dentry = container_of(head, struct dentry, d_rcu);
345345

346346
kmem_cache_free(dentry_cache, dentry);
347347
}
348348

349349
static void __d_free_external(struct rcu_head *head)
350350
{
351-
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
351+
struct dentry *dentry = container_of(head, struct dentry, d_rcu);
352352
kfree(external_name(dentry));
353353
kmem_cache_free(dentry_cache, dentry);
354354
}
@@ -428,19 +428,19 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
428428

429429
static void dentry_free(struct dentry *dentry)
430430
{
431-
WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
431+
WARN_ON(d_really_is_positive(dentry));
432432
if (unlikely(dname_external(dentry))) {
433433
struct external_name *p = external_name(dentry);
434434
if (likely(atomic_dec_and_test(&p->count))) {
435-
call_rcu(&dentry->d_u.d_rcu, __d_free_external);
435+
call_rcu(&dentry->d_rcu, __d_free_external);
436436
return;
437437
}
438438
}
439439
/* if dentry was never visible to RCU, immediate free is OK */
440440
if (dentry->d_flags & DCACHE_NORCU)
441-
__d_free(&dentry->d_u.d_rcu);
441+
__d_free(&dentry->d_rcu);
442442
else
443-
call_rcu(&dentry->d_u.d_rcu, __d_free);
443+
call_rcu(&dentry->d_rcu, __d_free);
444444
}
445445

446446
/*
@@ -455,7 +455,16 @@ static void dentry_unlink_inode(struct dentry * dentry)
455455

456456
raw_write_seqcount_begin(&dentry->d_seq);
457457
__d_clear_type_and_inode(dentry);
458-
hlist_del_init(&dentry->d_u.d_alias);
458+
hlist_del_init(&dentry->d_alias);
459+
/*
460+
* dentry becomes negative, so the space occupied by ->d_alias
461+
* belongs to ->waiters now; we could use __hlist_del() instead
462+
* of hlist_del_init(), if not for the stunt pulled by nfs
463+
* dummy root dentries - positive dentry *not* included into
464+
* the alias list of its inode. Open-coding hlist_del_init()
465+
* and removing zeroing would be too clumsy...
466+
*/
467+
dentry->waiters = NULL;
459468
raw_write_seqcount_end(&dentry->d_seq);
460469
spin_unlock(&dentry->d_lock);
461470
spin_unlock(&inode->i_lock);
@@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry)
605614
}
606615
EXPORT_SYMBOL(d_drop);
607616

617+
struct completion_list {
618+
struct completion_list *next;
619+
struct completion completion;
620+
};
621+
622+
/*
623+
* shrink_dcache_tree() needs to be notified when dentry in process of
624+
* being evicted finally gets unlisted. Such dentries are
625+
* already with negative ->d_count
626+
* already negative
627+
* already not in in-lookup hash
628+
* reachable only via ->d_sib.
629+
*
630+
* Use ->waiters for a single-linked list of struct completion_list of
631+
* waiters.
632+
*/
633+
static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
634+
{
635+
struct completion_list *v = dentry->waiters;
636+
init_completion(&p->completion);
637+
p->next = v;
638+
dentry->waiters = p;
639+
}
640+
641+
static inline void d_complete_waiters(struct dentry *dentry)
642+
{
643+
struct completion_list *v = dentry->waiters;
644+
if (unlikely(v)) {
645+
/* some shrink_dcache_tree() instances are waiting */
646+
dentry->waiters = NULL;
647+
while (v) {
648+
struct completion *r = &v->completion;
649+
v = v->next;
650+
complete(r);
651+
}
652+
}
653+
}
654+
608655
static inline void dentry_unlist(struct dentry *dentry)
609656
{
610657
struct dentry *next;
@@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry)
613660
* attached to the dentry tree
614661
*/
615662
dentry->d_flags |= DCACHE_DENTRY_KILLED;
663+
d_complete_waiters(dentry);
616664
if (unlikely(hlist_unhashed(&dentry->d_sib)))
617665
return;
618666
__hlist_del(&dentry->d_sib);
@@ -790,7 +838,7 @@ void d_mark_dontcache(struct inode *inode)
790838
struct dentry *de;
791839

792840
spin_lock(&inode->i_lock);
793-
hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
841+
for_each_alias(de, inode) {
794842
spin_lock(&de->d_lock);
795843
de->d_flags |= DCACHE_DONTCACHE;
796844
spin_unlock(&de->d_lock);
@@ -1010,7 +1058,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
10101058

10111059
if (hlist_empty(&inode->i_dentry))
10121060
return NULL;
1013-
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
1061+
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
10141062
lockref_get(&alias->d_lockref);
10151063
return alias;
10161064
}
@@ -1040,7 +1088,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
10401088
if (S_ISDIR(inode->i_mode))
10411089
return __d_find_any_alias(inode);
10421090

1043-
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1091+
for_each_alias(alias, inode) {
10441092
spin_lock(&alias->d_lock);
10451093
if (!d_unhashed(alias)) {
10461094
dget_dlock(alias);
@@ -1093,9 +1141,9 @@ struct dentry *d_find_alias_rcu(struct inode *inode)
10931141
// used without having I_FREEING set, which means no aliases left
10941142
if (likely(!(inode_state_read(inode) & I_FREEING) && !hlist_empty(l))) {
10951143
if (S_ISDIR(inode->i_mode)) {
1096-
de = hlist_entry(l->first, struct dentry, d_u.d_alias);
1144+
de = hlist_entry(l->first, struct dentry, d_alias);
10971145
} else {
1098-
hlist_for_each_entry(de, l, d_u.d_alias)
1146+
hlist_for_each_entry(de, l, d_alias)
10991147
if (!d_unhashed(de))
11001148
break;
11011149
}
@@ -1133,7 +1181,7 @@ void d_prune_aliases(struct inode *inode)
11331181
struct dentry *dentry;
11341182

11351183
spin_lock(&inode->i_lock);
1136-
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias)
1184+
for_each_alias(dentry, inode)
11371185
d_dispose_if_unused(dentry, &dispose);
11381186
spin_unlock(&inode->i_lock);
11391187
shrink_dentry_list(&dispose);
@@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
15691617
return D_WALK_QUIT;
15701618
}
15711619
to_shrink_list(dentry, &data->dispose);
1620+
} else if (dentry->d_lockref.count < 0) {
1621+
rcu_read_lock();
1622+
data->victim = dentry;
1623+
return D_WALK_QUIT;
15721624
}
15731625
/*
15741626
* We can return to the caller if we have found some (this
@@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
16081660
data.victim = NULL;
16091661
d_walk(parent, &data, select_collect2);
16101662
if (data.victim) {
1611-
spin_lock(&data.victim->d_lock);
1612-
if (!lock_for_kill(data.victim)) {
1613-
spin_unlock(&data.victim->d_lock);
1663+
struct dentry *v = data.victim;
1664+
1665+
spin_lock(&v->d_lock);
1666+
if (v->d_lockref.count < 0 &&
1667+
!(v->d_flags & DCACHE_DENTRY_KILLED)) {
1668+
struct completion_list wait;
1669+
// It's busy dying; have it notify us once
1670+
// it becomes invisible to d_walk().
1671+
d_add_waiter(v, &wait);
1672+
spin_unlock(&v->d_lock);
1673+
rcu_read_unlock();
1674+
if (!list_empty(&data.dispose))
1675+
shrink_dentry_list(&data.dispose);
1676+
wait_for_completion(&wait.completion);
1677+
continue;
1678+
}
1679+
if (!lock_for_kill(v)) {
1680+
spin_unlock(&v->d_lock);
16141681
rcu_read_unlock();
16151682
} else {
1616-
shrink_kill(data.victim);
1683+
shrink_kill(v);
16171684
}
16181685
}
16191686
if (!list_empty(&data.dispose))
@@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
17871854
INIT_HLIST_BL_NODE(&dentry->d_hash);
17881855
INIT_LIST_HEAD(&dentry->d_lru);
17891856
INIT_HLIST_HEAD(&dentry->d_children);
1790-
INIT_HLIST_NODE(&dentry->d_u.d_alias);
1857+
dentry->waiters = NULL;
17911858
INIT_HLIST_NODE(&dentry->d_sib);
17921859

17931860
if (dentry->d_op && dentry->d_op->d_init) {
@@ -1980,7 +2047,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
19802047
if ((dentry->d_flags &
19812048
(DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
19822049
this_cpu_dec(nr_dentry_negative);
1983-
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2050+
hlist_add_head(&dentry->d_alias, &inode->i_dentry);
19842051
raw_write_seqcount_begin(&dentry->d_seq);
19852052
__d_set_inode_and_type(dentry, inode, add_flags);
19862053
raw_write_seqcount_end(&dentry->d_seq);
@@ -2004,7 +2071,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
20042071

20052072
void d_instantiate(struct dentry *entry, struct inode * inode)
20062073
{
2007-
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2074+
BUG_ON(d_really_is_positive(entry));
20082075
if (inode) {
20092076
security_d_instantiate(entry, inode);
20102077
spin_lock(&inode->i_lock);
@@ -2024,7 +2091,7 @@ EXPORT_SYMBOL(d_instantiate);
20242091
*/
20252092
void d_instantiate_new(struct dentry *entry, struct inode *inode)
20262093
{
2027-
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2094+
BUG_ON(d_really_is_positive(entry));
20282095
BUG_ON(!inode);
20292096
lockdep_annotate_inode_mutex_key(inode);
20302097
security_d_instantiate(entry, inode);
@@ -2087,7 +2154,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
20872154

20882155
spin_lock(&new->d_lock);
20892156
__d_set_inode_and_type(new, inode, add_flags);
2090-
hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
2157+
hlist_add_head(&new->d_alias, &inode->i_dentry);
20912158
if (!disconnected) {
20922159
hlist_bl_lock(&sb->s_roots);
20932160
hlist_bl_add_head(&new->d_hash, &sb->s_roots);
@@ -2658,7 +2725,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
26582725
* we unlock the chain. All fields are stable in everything
26592726
* we encounter.
26602727
*/
2661-
hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
2728+
hlist_bl_for_each_entry(dentry, node, b, d_in_lookup_hash) {
26622729
if (dentry->d_name.hash != hash)
26632730
continue;
26642731
if (dentry->d_parent != parent)
@@ -2700,7 +2767,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
27002767
}
27012768
rcu_read_unlock();
27022769
new->d_wait = wq;
2703-
hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
2770+
hlist_bl_add_head(&new->d_in_lookup_hash, b);
27042771
hlist_bl_unlock(b);
27052772
return new;
27062773
mismatch:
@@ -2725,11 +2792,11 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
27252792
b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
27262793
hlist_bl_lock(b);
27272794
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
2728-
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
2795+
__hlist_bl_del(&dentry->d_in_lookup_hash);
27292796
d_wait = dentry->d_wait;
27302797
dentry->d_wait = NULL;
27312798
hlist_bl_unlock(b);
2732-
INIT_HLIST_NODE(&dentry->d_u.d_alias);
2799+
dentry->waiters = NULL;
27332800
INIT_LIST_HEAD(&dentry->d_lru);
27342801
return d_wait;
27352802
}
@@ -2760,7 +2827,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode,
27602827
d_set_d_op(dentry, ops);
27612828
if (inode) {
27622829
unsigned add_flags = d_flags_for_inode(inode);
2763-
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2830+
hlist_add_head(&dentry->d_alias, &inode->i_dentry);
27642831
raw_write_seqcount_begin(&dentry->d_seq);
27652832
__d_set_inode_and_type(dentry, inode, add_flags);
27662833
raw_write_seqcount_end(&dentry->d_seq);
@@ -2795,7 +2862,7 @@ EXPORT_SYMBOL(d_add);
27952862

27962863
struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode)
27972864
{
2798-
WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
2865+
WARN_ON(d_really_is_positive(dentry));
27992866
WARN_ON(!inode);
28002867
security_d_instantiate(dentry, inode);
28012868
spin_lock(&inode->i_lock);
@@ -3185,7 +3252,7 @@ void d_mark_tmpfile(struct file *file, struct inode *inode)
31853252
struct dentry *dentry = file->f_path.dentry;
31863253

31873254
BUG_ON(dname_external(dentry) ||
3188-
!hlist_unhashed(&dentry->d_u.d_alias) ||
3255+
d_really_is_positive(dentry) ||
31893256
!d_unlinked(dentry));
31903257
spin_lock(&dentry->d_parent->d_lock);
31913258
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);

fs/exportfs/expfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ find_acceptable_alias(struct dentry *result,
5252

5353
inode = result->d_inode;
5454
spin_lock(&inode->i_lock);
55-
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
55+
for_each_alias(dentry, inode) {
5656
dget(dentry);
5757
spin_unlock(&inode->i_lock);
5858
if (toput)

fs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,7 @@ void dump_mapping(const struct address_space *mapping)
750750
return;
751751
}
752752

753-
dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
753+
dentry_ptr = container_of(dentry_first, struct dentry, d_alias);
754754
if (get_kernel_nofault(dentry, dentry_ptr) ||
755755
!dentry.d_parent || !dentry.d_name.name) {
756756
pr_warn("aops:%ps ino:%llx invalid dentry:%px\n",

0 commit comments

Comments
 (0)