Skip to content

Commit 9abff57

Browse files
committed
Merge tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fixes from Tejun Heo: - Improve workqueue stall diagnostics: dump all busy workers (not just running ones), show wall-clock duration of in-flight work items, and add a sample module for reproducing stalls - Fix POOL_BH vs WQ_BH flag namespace mismatch in pr_cont_worker_id() - Rename pool->watchdog_ts to pool->last_progress_ts and related functions for clarity * tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: Rename show_cpu_pool{s,}_hog{s,}() to reflect broadened scope workqueue: Add stall detector sample module workqueue: Show all busy workers in stall diagnostics workqueue: Show in-flight work item duration in stall diagnostics workqueue: Rename pool->watchdog_ts to pool->last_progress_ts workqueue: Use POOL_BH instead of WQ_BH when checking pool flags
2 parents b073bcb + 98c790b commit 9abff57

4 files changed

Lines changed: 128 additions & 27 deletions

File tree

kernel/workqueue.c

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ struct worker_pool {
190190
int id; /* I: pool ID */
191191
unsigned int flags; /* L: flags */
192192

193-
unsigned long watchdog_ts; /* L: watchdog timestamp */
193+
unsigned long last_progress_ts; /* L: last forward progress timestamp */
194194
bool cpu_stall; /* WD: stalled cpu bound pool */
195195

196196
/*
@@ -1697,7 +1697,7 @@ static void __pwq_activate_work(struct pool_workqueue *pwq,
16971697
WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
16981698
trace_workqueue_activate_work(work);
16991699
if (list_empty(&pwq->pool->worklist))
1700-
pwq->pool->watchdog_ts = jiffies;
1700+
pwq->pool->last_progress_ts = jiffies;
17011701
move_linked_works(work, &pwq->pool->worklist, NULL);
17021702
__clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
17031703
}
@@ -2348,7 +2348,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
23482348
*/
23492349
if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
23502350
if (list_empty(&pool->worklist))
2351-
pool->watchdog_ts = jiffies;
2351+
pool->last_progress_ts = jiffies;
23522352

23532353
trace_workqueue_activate_work(work);
23542354
insert_work(pwq, work, &pool->worklist, work_flags);
@@ -3204,6 +3204,7 @@ __acquires(&pool->lock)
32043204
worker->current_pwq = pwq;
32053205
if (worker->task)
32063206
worker->current_at = worker->task->se.sum_exec_runtime;
3207+
worker->current_start = jiffies;
32073208
work_data = *work_data_bits(work);
32083209
worker->current_color = get_work_color(work_data);
32093210

@@ -3352,7 +3353,7 @@ static void process_scheduled_works(struct worker *worker)
33523353
while ((work = list_first_entry_or_null(&worker->scheduled,
33533354
struct work_struct, entry))) {
33543355
if (first) {
3355-
worker->pool->watchdog_ts = jiffies;
3356+
worker->pool->last_progress_ts = jiffies;
33563357
first = false;
33573358
}
33583359
process_one_work(worker, work);
@@ -4850,7 +4851,7 @@ static int init_worker_pool(struct worker_pool *pool)
48504851
pool->cpu = -1;
48514852
pool->node = NUMA_NO_NODE;
48524853
pool->flags |= POOL_DISASSOCIATED;
4853-
pool->watchdog_ts = jiffies;
4854+
pool->last_progress_ts = jiffies;
48544855
INIT_LIST_HEAD(&pool->worklist);
48554856
INIT_LIST_HEAD(&pool->idle_list);
48564857
hash_init(pool->busy_hash);
@@ -6274,7 +6275,7 @@ static void pr_cont_worker_id(struct worker *worker)
62746275
{
62756276
struct worker_pool *pool = worker->pool;
62766277

6277-
if (pool->flags & WQ_BH)
6278+
if (pool->flags & POOL_BH)
62786279
pr_cont("bh%s",
62796280
pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
62806281
else
@@ -6359,6 +6360,8 @@ static void show_pwq(struct pool_workqueue *pwq)
63596360
pr_cont(" %s", comma ? "," : "");
63606361
pr_cont_worker_id(worker);
63616362
pr_cont(":%ps", worker->current_func);
6363+
pr_cont(" for %us",
6364+
jiffies_to_msecs(jiffies - worker->current_start) / 1000);
63626365
list_for_each_entry(work, &worker->scheduled, entry)
63636366
pr_cont_work(false, work, &pcws);
63646367
pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
@@ -6462,7 +6465,7 @@ static void show_one_worker_pool(struct worker_pool *pool)
64626465

64636466
/* How long the first pending work is waiting for a worker. */
64646467
if (!list_empty(&pool->worklist))
6465-
hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
6468+
hung = jiffies_to_msecs(jiffies - pool->last_progress_ts) / 1000;
64666469

64676470
/*
64686471
* Defer printing to avoid deadlocks in console drivers that
@@ -7580,11 +7583,11 @@ MODULE_PARM_DESC(panic_on_stall_time, "Panic if stall exceeds this many seconds
75807583

75817584
/*
75827585
* Show workers that might prevent the processing of pending work items.
7583-
* The only candidates are CPU-bound workers in the running state.
7584-
* Pending work items should be handled by another idle worker
7585-
* in all other situations.
7586+
* A busy worker that is not running on the CPU (e.g. sleeping in
7587+
* wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as
7588+
* effectively as a CPU-bound one, so dump every in-flight worker.
75867589
*/
7587-
static void show_cpu_pool_hog(struct worker_pool *pool)
7590+
static void show_cpu_pool_busy_workers(struct worker_pool *pool)
75887591
{
75897592
struct worker *worker;
75907593
unsigned long irq_flags;
@@ -7593,36 +7596,34 @@ static void show_cpu_pool_hog(struct worker_pool *pool)
75937596
raw_spin_lock_irqsave(&pool->lock, irq_flags);
75947597

75957598
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
7596-
if (task_is_running(worker->task)) {
7597-
/*
7598-
* Defer printing to avoid deadlocks in console
7599-
* drivers that queue work while holding locks
7600-
* also taken in their write paths.
7601-
*/
7602-
printk_deferred_enter();
7599+
/*
7600+
* Defer printing to avoid deadlocks in console
7601+
* drivers that queue work while holding locks
7602+
* also taken in their write paths.
7603+
*/
7604+
printk_deferred_enter();
76037605

7604-
pr_info("pool %d:\n", pool->id);
7605-
sched_show_task(worker->task);
7606+
pr_info("pool %d:\n", pool->id);
7607+
sched_show_task(worker->task);
76067608

7607-
printk_deferred_exit();
7608-
}
7609+
printk_deferred_exit();
76097610
}
76107611

76117612
raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
76127613
}
76137614

7614-
static void show_cpu_pools_hogs(void)
7615+
static void show_cpu_pools_busy_workers(void)
76157616
{
76167617
struct worker_pool *pool;
76177618
int pi;
76187619

7619-
pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
7620+
pr_info("Showing backtraces of busy workers in stalled worker pools:\n");
76207621

76217622
rcu_read_lock();
76227623

76237624
for_each_pool(pool, pi) {
76247625
if (pool->cpu_stall)
7625-
show_cpu_pool_hog(pool);
7626+
show_cpu_pool_busy_workers(pool);
76267627

76277628
}
76287629

@@ -7691,7 +7692,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
76917692
touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
76927693
else
76937694
touched = READ_ONCE(wq_watchdog_touched);
7694-
pool_ts = READ_ONCE(pool->watchdog_ts);
7695+
pool_ts = READ_ONCE(pool->last_progress_ts);
76957696

76967697
if (time_after(pool_ts, touched))
76977698
ts = pool_ts;
@@ -7719,7 +7720,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
77197720
show_all_workqueues();
77207721

77217722
if (cpu_pool_stall)
7722-
show_cpu_pools_hogs();
7723+
show_cpu_pools_busy_workers();
77237724

77247725
if (lockup_detected)
77257726
panic_on_wq_watchdog(max_stall_time);

kernel/workqueue_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct worker {
3232
work_func_t current_func; /* K: function */
3333
struct pool_workqueue *current_pwq; /* K: pwq */
3434
u64 current_at; /* K: runtime at start or last wakeup */
35+
unsigned long current_start; /* K: start time of current work item */
3536
unsigned int current_color; /* K: color */
3637

3738
int sleeping; /* S: is worker sleeping? */
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
obj-m += wq_stall.o
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* wq_stall - Test module for the workqueue stall detector.
4+
*
5+
* Deliberately creates a workqueue stall so the watchdog fires and
6+
* prints diagnostic output. Useful for verifying that the stall
7+
* detector correctly identifies stuck workers and produces useful
8+
* backtraces.
9+
*
10+
* The stall is triggered by clearing PF_WQ_WORKER before sleeping,
11+
* which hides the worker from the concurrency manager. A second
12+
* work item queued on the same pool then sits in the worklist with
13+
* no worker available to process it.
14+
*
15+
* After ~30s the workqueue watchdog fires:
16+
* BUG: workqueue lockup - pool cpus=N ...
17+
*
18+
* Build:
19+
* make -C <kernel tree> M=samples/workqueue/stall_detector modules
20+
*
21+
* Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
22+
* Copyright (c) 2026 Breno Leitao <leitao@debian.org>
23+
*/
24+
25+
#include <linux/module.h>
26+
#include <linux/workqueue.h>
27+
#include <linux/wait.h>
28+
#include <linux/atomic.h>
29+
#include <linux/sched.h>
30+
31+
static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head);
32+
static atomic_t wake_condition = ATOMIC_INIT(0);
33+
static struct work_struct stall_work1;
34+
static struct work_struct stall_work2;
35+
36+
static void stall_work2_fn(struct work_struct *work)
37+
{
38+
pr_info("wq_stall: second work item finally ran\n");
39+
}
40+
41+
static void stall_work1_fn(struct work_struct *work)
42+
{
43+
pr_info("wq_stall: first work item running on cpu %d\n",
44+
raw_smp_processor_id());
45+
46+
/*
47+
* Queue second item while we're still counted as running
48+
* (pool->nr_running > 0). Since schedule_work() on a per-CPU
49+
* workqueue targets raw_smp_processor_id(), item 2 lands on the
50+
* same pool. __queue_work -> kick_pool -> need_more_worker()
51+
* sees nr_running > 0 and does NOT wake a new worker.
52+
*/
53+
schedule_work(&stall_work2);
54+
55+
/*
56+
* Hide from the workqueue concurrency manager. Without
57+
* PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(),
58+
* so nr_running is never decremented and no replacement
59+
* worker is created. Item 2 stays stuck in pool->worklist.
60+
*/
61+
current->flags &= ~PF_WQ_WORKER;
62+
63+
pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n");
64+
pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n");
65+
wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0);
66+
67+
/* Restore so process_one_work() cleanup works correctly */
68+
current->flags |= PF_WQ_WORKER;
69+
pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n");
70+
}
71+
72+
static int __init wq_stall_init(void)
73+
{
74+
pr_info("wq_stall: loading\n");
75+
76+
INIT_WORK(&stall_work1, stall_work1_fn);
77+
INIT_WORK(&stall_work2, stall_work2_fn);
78+
schedule_work(&stall_work1);
79+
80+
return 0;
81+
}
82+
83+
static void __exit wq_stall_exit(void)
84+
{
85+
pr_info("wq_stall: unloading\n");
86+
atomic_set(&wake_condition, 1);
87+
wake_up(&stall_wq_head);
88+
flush_work(&stall_work1);
89+
flush_work(&stall_work2);
90+
pr_info("wq_stall: all work flushed, module unloaded\n");
91+
}
92+
93+
module_init(wq_stall_init);
94+
module_exit(wq_stall_exit);
95+
96+
MODULE_LICENSE("GPL");
97+
MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse");
98+
MODULE_AUTHOR("Breno Leitao <leitao@debian.org>");

0 commit comments

Comments
 (0)