Skip to content

Commit 7c405fb

Browse files
author
Boqun Feng
committed
rcu: Use an intermediate irq_work to start process_srcu()
Since commit c27cea4 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast") we switched to SRCU in BPF. However as BPF instrument can happen basically everywhere (including where a scheduler lock is held), call_srcu() now needs to avoid acquiring scheduler lock because otherwise it could cause deadlock [1]. Fix this by following what the previous RCU Tasks Trace did: using an irq_work to delay the queuing of the work to start process_srcu(). [boqun: Apply Joel's feedback] [boqun: Apply Andrea's test feedback] Reported-by: Andrea Righi <arighi@nvidia.com> Closes: https://lore.kernel.org/all/abjzvz_tL_siV17s@gpd4/ Fixes: commit c27cea4 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast") Link: https://lore.kernel.org/rcu/3c4c5a29-24ea-492d-aeee-e0d9605b4183@nvidia.com/ [1] Suggested-by: Zqiang <qiang.zhang@linux.dev> Tested-by: Andrea Righi <arighi@nvidia.com> Tested-by: Paul E. McKenney <paulmck@kernel.org> Tested-by: Joel Fernandes <joelagnelf@nvidia.com> Signed-off-by: Boqun Feng <boqun@kernel.org>
1 parent 61bbcfb commit 7c405fb

2 files changed

Lines changed: 29 additions & 2 deletions

File tree

include/linux/srcutree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ struct srcu_usage {
9595
unsigned long reschedule_jiffies;
9696
unsigned long reschedule_count;
9797
struct delayed_work work;
98+
struct irq_work irq_work;
9899
struct srcu_struct *srcu_ssp;
99100
};
100101

kernel/rcu/srcutree.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/mutex.h>
2020
#include <linux/percpu.h>
2121
#include <linux/preempt.h>
22+
#include <linux/irq_work.h>
2223
#include <linux/rcupdate_wait.h>
2324
#include <linux/sched.h>
2425
#include <linux/smp.h>
@@ -75,6 +76,7 @@ static bool __read_mostly srcu_init_done;
7576
static void srcu_invoke_callbacks(struct work_struct *work);
7677
static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
7778
static void process_srcu(struct work_struct *work);
79+
static void srcu_irq_work(struct irq_work *work);
7880
static void srcu_delay_timer(struct timer_list *t);
7981

8082
/*
@@ -216,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
216218
mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
217219
atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
218220
INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
221+
init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work);
219222
ssp->srcu_sup->sda_is_static = is_static;
220223
if (!is_static) {
221224
ssp->sda = alloc_percpu(struct srcu_data);
@@ -716,6 +719,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
716719
return; /* Just leak it! */
717720
if (WARN_ON(srcu_readers_active(ssp)))
718721
return; /* Just leak it! */
722+
/* Wait for irq_work to finish first as it may queue a new work. */
723+
irq_work_sync(&sup->irq_work);
719724
flush_delayed_work(&sup->work);
720725
for_each_possible_cpu(cpu) {
721726
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
@@ -1121,9 +1126,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
11211126
// it isn't. And it does not have to be. After all, it
11221127
// can only be executed during early boot when there is only
11231128
// the one boot CPU running with interrupts still disabled.
1129+
//
1130+
// Use an irq_work here to avoid acquiring runqueue lock with
1131+
// srcu rcu_node::lock held. BPF instrument could introduce the
1132+
// opposite dependency, hence we need to break the possible
1133+
// locking dependency here.
11241134
if (likely(srcu_init_done))
1125-
queue_delayed_work(rcu_gp_wq, &sup->work,
1126-
!!srcu_get_delay(ssp));
1135+
irq_work_queue(&sup->irq_work);
11271136
else if (list_empty(&sup->work.work.entry))
11281137
list_add(&sup->work.work.entry, &srcu_boot_list);
11291138
}
@@ -1982,6 +1991,23 @@ static void process_srcu(struct work_struct *work)
19821991
srcu_reschedule(ssp, curdelay);
19831992
}
19841993

1994+
static void srcu_irq_work(struct irq_work *work)
1995+
{
1996+
struct srcu_struct *ssp;
1997+
struct srcu_usage *sup;
1998+
unsigned long delay;
1999+
unsigned long flags;
2000+
2001+
sup = container_of(work, struct srcu_usage, irq_work);
2002+
ssp = sup->srcu_ssp;
2003+
2004+
raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
2005+
delay = srcu_get_delay(ssp);
2006+
raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
2007+
2008+
queue_delayed_work(rcu_gp_wq, &sup->work, !!delay);
2009+
}
2010+
19852011
void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
19862012
unsigned long *gp_seq)
19872013
{

0 commit comments

Comments
 (0)