Skip to content

Commit 13578a0

Browse files
committed
rv: Add sample hybrid monitor stall
Add a sample monitor to showcase hybrid/timed automata. The stall monitor identifies tasks stalled for longer than a threshold and reacts when that happens. Reviewed-by: Nam Cao <namcao@linutronix.de> Link: https://lore.kernel.org/r/20260330111010.153663-7-gmonaco@redhat.com Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
1 parent 708340c commit 13578a0

12 files changed

Lines changed: 377 additions & 0 deletions

File tree

Documentation/tools/rv/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ Runtime verification (rv) tool
1616
rv-mon-wip
1717
rv-mon-wwnr
1818
rv-mon-sched
19+
rv-mon-stall
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
============
4+
rv-mon-stall
5+
============
6+
--------------------
7+
Stalled task monitor
8+
--------------------
9+
10+
:Manual section: 1
11+
12+
SYNOPSIS
13+
========
14+
15+
**rv mon stall** [*OPTIONS*]
16+
17+
DESCRIPTION
18+
===========
19+
20+
The stalled task (**stall**) monitor is a sample per-task timed monitor that
21+
checks if tasks are scheduled within a defined threshold after they are ready.
22+
23+
See kernel documentation for further information about this monitor:
24+
<https://docs.kernel.org/trace/rv/monitor_stall.html>
25+
26+
OPTIONS
27+
=======
28+
29+
.. include:: common_ikm.rst
30+
31+
SEE ALSO
32+
========
33+
34+
**rv**\(1), **rv-mon**\(1)
35+
36+
Linux kernel *RV* documentation:
37+
<https://www.kernel.org/doc/html/latest/trace/rv/index.html>
38+
39+
AUTHOR
40+
======
41+
42+
Written by Gabriele Monaco <gmonaco@redhat.com>
43+
44+
.. include:: common_appendix.rst

Documentation/trace/rv/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ Runtime Verification
1616
monitor_wwnr.rst
1717
monitor_sched.rst
1818
monitor_rtapp.rst
19+
monitor_stall.rst
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
Monitor stall
2+
=============
3+
4+
- Name: stall - stalled task monitor
5+
- Type: per-task hybrid automaton
6+
- Author: Gabriele Monaco <gmonaco@redhat.com>
7+
8+
Description
9+
-----------
10+
11+
The stalled task (stall) monitor is a sample per-task timed monitor that checks
12+
if tasks are scheduled within a defined threshold after they are ready::
13+
14+
|
15+
|
16+
v
17+
#==========================#
18+
+-----------------> H dequeued H
19+
| #==========================#
20+
| |
21+
sched_switch_wait | sched_wakeup;reset(clk)
22+
| v
23+
| +--------------------------+ <+
24+
| | enqueued | | sched_wakeup
25+
| | clk < threshold_jiffies | -+
26+
| +--------------------------+
27+
| | ^
28+
| sched_switch_in sched_switch_preempt;reset(clk)
29+
| v |
30+
| +--------------------------+
31+
+------------------ | running |
32+
+--------------------------+
33+
^ sched_switch_in |
34+
| sched_wakeup |
35+
+----------------------+
36+
37+
The threshold can be configured as a parameter by either booting with the
38+
``stall.threshold_jiffies=<new value>`` argument or writing a new value to
39+
``/sys/module/stall/parameters/threshold_jiffies``.
40+
41+
Specification
42+
-------------
43+
Graphviz Dot file in tools/verification/models/stall.dot

kernel/trace/rv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ source "kernel/trace/rv/monitors/pagefault/Kconfig"
7878
source "kernel/trace/rv/monitors/sleep/Kconfig"
7979
# Add new rtapp monitors here
8080

81+
source "kernel/trace/rv/monitors/stall/Kconfig"
8182
# Add new monitors here
8283

8384
config RV_REACTORS

kernel/trace/rv/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
1717
obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
1818
obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
1919
obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
20+
obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
2021
# Add new monitors here
2122
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
2223
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# SPDX-License-Identifier: GPL-2.0-only
2+
#
3+
config RV_MON_STALL
4+
depends on RV
5+
select HA_MON_EVENTS_ID
6+
bool "stall monitor"
7+
help
8+
Enable the stall sample monitor that illustrates the usage of hybrid
9+
automata monitors. It can be used to identify tasks stalled for
10+
longer than a threshold.
11+
12+
For further information, see:
13+
Documentation/trace/rv/monitor_stall.rst
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/ftrace.h>
3+
#include <linux/tracepoint.h>
4+
#include <linux/kernel.h>
5+
#include <linux/module.h>
6+
#include <linux/init.h>
7+
#include <linux/rv.h>
8+
#include <rv/instrumentation.h>
9+
10+
#define MODULE_NAME "stall"
11+
12+
#include <trace/events/sched.h>
13+
#include <rv_trace.h>
14+
15+
#define RV_MON_TYPE RV_MON_PER_TASK
16+
#define HA_TIMER_TYPE HA_TIMER_WHEEL
17+
#include "stall.h"
18+
#include <rv/ha_monitor.h>
19+
20+
static u64 threshold_jiffies = 1000;
21+
module_param(threshold_jiffies, ullong, 0644);
22+
23+
static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
24+
{
25+
if (env == clk_stall)
26+
return ha_get_clk_jiffy(ha_mon, env);
27+
return ENV_INVALID_VALUE;
28+
}
29+
30+
static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
31+
{
32+
if (env == clk_stall)
33+
ha_reset_clk_jiffy(ha_mon, env);
34+
}
35+
36+
static inline bool ha_verify_invariants(struct ha_monitor *ha_mon,
37+
enum states curr_state, enum events event,
38+
enum states next_state, u64 time_ns)
39+
{
40+
if (curr_state == enqueued_stall)
41+
return ha_check_invariant_jiffy(ha_mon, clk_stall, time_ns);
42+
return true;
43+
}
44+
45+
static inline bool ha_verify_guards(struct ha_monitor *ha_mon,
46+
enum states curr_state, enum events event,
47+
enum states next_state, u64 time_ns)
48+
{
49+
bool res = true;
50+
51+
if (curr_state == dequeued_stall && event == sched_wakeup_stall)
52+
ha_reset_env(ha_mon, clk_stall, time_ns);
53+
else if (curr_state == running_stall && event == sched_switch_preempt_stall)
54+
ha_reset_env(ha_mon, clk_stall, time_ns);
55+
return res;
56+
}
57+
58+
static inline void ha_setup_invariants(struct ha_monitor *ha_mon,
59+
enum states curr_state, enum events event,
60+
enum states next_state, u64 time_ns)
61+
{
62+
if (next_state == curr_state)
63+
return;
64+
if (next_state == enqueued_stall)
65+
ha_start_timer_jiffy(ha_mon, clk_stall, threshold_jiffies, time_ns);
66+
else if (curr_state == enqueued_stall)
67+
ha_cancel_timer(ha_mon);
68+
}
69+
70+
static bool ha_verify_constraint(struct ha_monitor *ha_mon,
71+
enum states curr_state, enum events event,
72+
enum states next_state, u64 time_ns)
73+
{
74+
if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns))
75+
return false;
76+
77+
if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns))
78+
return false;
79+
80+
ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns);
81+
82+
return true;
83+
}
84+
85+
static void handle_sched_switch(void *data, bool preempt,
86+
struct task_struct *prev,
87+
struct task_struct *next,
88+
unsigned int prev_state)
89+
{
90+
if (!preempt && prev_state != TASK_RUNNING)
91+
da_handle_start_event(prev, sched_switch_wait_stall);
92+
else
93+
da_handle_event(prev, sched_switch_preempt_stall);
94+
da_handle_event(next, sched_switch_in_stall);
95+
}
96+
97+
static void handle_sched_wakeup(void *data, struct task_struct *p)
98+
{
99+
da_handle_event(p, sched_wakeup_stall);
100+
}
101+
102+
static int enable_stall(void)
103+
{
104+
int retval;
105+
106+
retval = da_monitor_init();
107+
if (retval)
108+
return retval;
109+
110+
rv_attach_trace_probe("stall", sched_switch, handle_sched_switch);
111+
rv_attach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
112+
113+
return 0;
114+
}
115+
116+
static void disable_stall(void)
117+
{
118+
rv_this.enabled = 0;
119+
120+
rv_detach_trace_probe("stall", sched_switch, handle_sched_switch);
121+
rv_detach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
122+
123+
da_monitor_destroy();
124+
}
125+
126+
static struct rv_monitor rv_this = {
127+
.name = "stall",
128+
.description = "identify tasks stalled for longer than a threshold.",
129+
.enable = enable_stall,
130+
.disable = disable_stall,
131+
.reset = da_monitor_reset_all,
132+
.enabled = 0,
133+
};
134+
135+
static int __init register_stall(void)
136+
{
137+
return rv_register_monitor(&rv_this, NULL);
138+
}
139+
140+
static void __exit unregister_stall(void)
141+
{
142+
rv_unregister_monitor(&rv_this);
143+
}
144+
145+
module_init(register_stall);
146+
module_exit(unregister_stall);
147+
148+
MODULE_LICENSE("GPL");
149+
MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
150+
MODULE_DESCRIPTION("stall: identify tasks stalled for longer than a threshold.");
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Automatically generated C representation of stall automaton
4+
* For further information about this format, see kernel documentation:
5+
* Documentation/trace/rv/deterministic_automata.rst
6+
*/
7+
8+
#define MONITOR_NAME stall
9+
10+
enum states_stall {
11+
dequeued_stall,
12+
enqueued_stall,
13+
running_stall,
14+
state_max_stall,
15+
};
16+
17+
#define INVALID_STATE state_max_stall
18+
19+
enum events_stall {
20+
sched_switch_in_stall,
21+
sched_switch_preempt_stall,
22+
sched_switch_wait_stall,
23+
sched_wakeup_stall,
24+
event_max_stall,
25+
};
26+
27+
enum envs_stall {
28+
clk_stall,
29+
env_max_stall,
30+
env_max_stored_stall = env_max_stall,
31+
};
32+
33+
_Static_assert(env_max_stored_stall <= MAX_HA_ENV_LEN, "Not enough slots");
34+
35+
struct automaton_stall {
36+
char *state_names[state_max_stall];
37+
char *event_names[event_max_stall];
38+
char *env_names[env_max_stall];
39+
unsigned char function[state_max_stall][event_max_stall];
40+
unsigned char initial_state;
41+
bool final_states[state_max_stall];
42+
};
43+
44+
static const struct automaton_stall automaton_stall = {
45+
.state_names = {
46+
"dequeued",
47+
"enqueued",
48+
"running",
49+
},
50+
.event_names = {
51+
"sched_switch_in",
52+
"sched_switch_preempt",
53+
"sched_switch_wait",
54+
"sched_wakeup",
55+
},
56+
.env_names = {
57+
"clk",
58+
},
59+
.function = {
60+
{
61+
INVALID_STATE,
62+
INVALID_STATE,
63+
INVALID_STATE,
64+
enqueued_stall,
65+
},
66+
{
67+
running_stall,
68+
INVALID_STATE,
69+
INVALID_STATE,
70+
enqueued_stall,
71+
},
72+
{
73+
running_stall,
74+
enqueued_stall,
75+
dequeued_stall,
76+
running_stall,
77+
},
78+
},
79+
.initial_state = dequeued_stall,
80+
.final_states = { 1, 0, 0 },
81+
};
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
/*
4+
* Snippet to be included in rv_trace.h
5+
*/
6+
7+
#ifdef CONFIG_RV_MON_STALL
8+
DEFINE_EVENT(event_da_monitor_id, event_stall,
9+
TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
10+
TP_ARGS(id, state, event, next_state, final_state));
11+
12+
DEFINE_EVENT(error_da_monitor_id, error_stall,
13+
TP_PROTO(int id, char *state, char *event),
14+
TP_ARGS(id, state, event));
15+
16+
DEFINE_EVENT(error_env_da_monitor_id, error_env_stall,
17+
TP_PROTO(int id, char *state, char *event, char *env),
18+
TP_ARGS(id, state, event, env));
19+
#endif /* CONFIG_RV_MON_STALL */

0 commit comments

Comments
 (0)