Skip to content

Commit 2b406fd

Browse files
committed
rv: Convert the opid monitor to a hybrid automaton
The opid monitor validates that wakeup and need_resched events only occur with interrupts and preemption disabled by following the preemptirq tracepoints. As reported in [1], those tracepoints might be inaccurate in some situations (e.g. NMIs). Since the monitor doesn't validate other ordering properties, remove the dependency on preemptirq tracepoints and convert the monitor to a hybrid automaton to validate the constraint during event handling. This makes the monitor more robust by also removing the workaround for interrupts missing the preemption tracepoints, which was working on PREEMPT_RT only and allows the monitor to be built on kernels without the preemptirqs tracepoints. [1] - https://lore.kernel.org/lkml/20250625120823.60600-1-gmonaco@redhat.com Reviewed-by: Nam Cao <namcao@linutronix.de> Link: https://lore.kernel.org/r/20260330111010.153663-8-gmonaco@redhat.com Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
1 parent 13578a0 commit 2b406fd

7 files changed

Lines changed: 82 additions & 230 deletions

File tree

Documentation/trace/rv/monitor_sched.rst

Lines changed: 14 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -346,55 +346,21 @@ Monitor opid
346346

347347
The operations with preemption and irq disabled (opid) monitor ensures
348348
operations like ``wakeup`` and ``need_resched`` occur with interrupts and
349-
preemption disabled or during interrupt context, in such case preemption may
350-
not be disabled explicitly.
349+
preemption disabled.
351350
``need_resched`` can be set by some RCU internals functions, in which case it
352-
doesn't match a task wakeup and might occur with only interrupts disabled::
353-
354-
| sched_need_resched
355-
| sched_waking
356-
| irq_entry
357-
| +--------------------+
358-
v v |
359-
+------------------------------------------------------+
360-
+----------- | disabled | <+
361-
| +------------------------------------------------------+ |
362-
| | ^ |
363-
| | preempt_disable sched_need_resched |
364-
| preempt_enable | +--------------------+ |
365-
| v | v | |
366-
| +------------------------------------------------------+ |
367-
| | irq_disabled | |
368-
| +------------------------------------------------------+ |
369-
| | | ^ |
370-
| irq_entry irq_entry | | |
371-
| sched_need_resched v | irq_disable |
372-
| sched_waking +--------------+ | | |
373-
| +----- | | irq_enable | |
374-
| | | in_irq | | | |
375-
| +----> | | | | |
376-
| +--------------+ | | irq_disable
377-
| | | | |
378-
| irq_enable | irq_enable | | |
379-
| v v | |
380-
| #======================================================# |
381-
| H enabled H |
382-
| #======================================================# |
383-
| | ^ ^ preempt_enable | |
384-
| preempt_disable preempt_enable +--------------------+ |
385-
| v | |
386-
| +------------------+ | |
387-
+----------> | preempt_disabled | -+ |
388-
+------------------+ |
389-
| |
390-
+-------------------------------------------------------+
391-
392-
This monitor is designed to work on ``PREEMPT_RT`` kernels, the special case of
393-
events occurring in interrupt context is a shortcut to identify valid scenarios
394-
where the preemption tracepoints might not be visible, during interrupts
395-
preemption is always disabled. On non- ``PREEMPT_RT`` kernels, the interrupts
396-
might invoke a softirq to set ``need_resched`` and wake up a task. This is
397-
another special case that is currently not supported by the monitor.
351+
doesn't match a task wakeup and might occur with only interrupts disabled.
352+
The interrupt and preemption status are validated by the hybrid automaton
353+
constraints when processing the events::
354+
355+
|
356+
|
357+
v
358+
#=========# sched_need_resched;irq_off == 1
359+
H H sched_waking;irq_off == 1 && preempt_off == 1
360+
H any H ------------------------------------------------+
361+
H H |
362+
H H <-----------------------------------------------+
363+
#=========#
398364

399365
References
400366
----------

kernel/trace/rv/monitors/opid/Kconfig

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,13 @@
22
#
33
config RV_MON_OPID
44
depends on RV
5-
depends on TRACE_IRQFLAGS
6-
depends on TRACE_PREEMPT_TOGGLE
75
depends on RV_MON_SCHED
8-
default y if PREEMPT_RT
9-
select DA_MON_EVENTS_IMPLICIT
6+
default y
7+
select HA_MON_EVENTS_IMPLICIT
108
bool "opid monitor"
119
help
1210
Monitor to ensure operations like wakeup and need resched occur with
13-
interrupts and preemption disabled or during IRQs, where preemption
14-
may not be disabled explicitly.
15-
16-
This monitor is unstable on !PREEMPT_RT, say N unless you are testing it.
11+
interrupts and preemption disabled.
1712

1813
For further information, see:
1914
Documentation/trace/rv/monitor_sched.rst

kernel/trace/rv/monitors/opid/opid.c

Lines changed: 34 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -10,94 +10,63 @@
1010
#define MODULE_NAME "opid"
1111

1212
#include <trace/events/sched.h>
13-
#include <trace/events/irq.h>
14-
#include <trace/events/preemptirq.h>
1513
#include <rv_trace.h>
1614
#include <monitors/sched/sched.h>
1715

1816
#define RV_MON_TYPE RV_MON_PER_CPU
1917
#include "opid.h"
20-
#include <rv/da_monitor.h>
18+
#include <rv/ha_monitor.h>
2119

22-
#ifdef CONFIG_X86_LOCAL_APIC
23-
#include <asm/trace/irq_vectors.h>
24-
25-
static void handle_vector_irq_entry(void *data, int vector)
20+
static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_opid env, u64 time_ns)
2621
{
27-
da_handle_event(irq_entry_opid);
28-
}
29-
30-
static void attach_vector_irq(void)
31-
{
32-
rv_attach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry);
33-
if (IS_ENABLED(CONFIG_IRQ_WORK))
34-
rv_attach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry);
35-
if (IS_ENABLED(CONFIG_SMP)) {
36-
rv_attach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry);
37-
rv_attach_trace_probe("opid", call_function_entry, handle_vector_irq_entry);
38-
rv_attach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry);
22+
if (env == irq_off_opid)
23+
return irqs_disabled();
24+
else if (env == preempt_off_opid) {
25+
/*
26+
* If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
27+
* preemption (adding one to the preempt_count). Since we are
28+
* interested in the preempt_count at the time the tracepoint was
29+
* hit, we consider 1 as still enabled.
30+
*/
31+
if (IS_ENABLED(CONFIG_PREEMPTION))
32+
return (preempt_count() & PREEMPT_MASK) > 1;
33+
return true;
3934
}
35+
return ENV_INVALID_VALUE;
4036
}
4137

42-
static void detach_vector_irq(void)
38+
static inline bool ha_verify_guards(struct ha_monitor *ha_mon,
39+
enum states curr_state, enum events event,
40+
enum states next_state, u64 time_ns)
4341
{
44-
rv_detach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry);
45-
if (IS_ENABLED(CONFIG_IRQ_WORK))
46-
rv_detach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry);
47-
if (IS_ENABLED(CONFIG_SMP)) {
48-
rv_detach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry);
49-
rv_detach_trace_probe("opid", call_function_entry, handle_vector_irq_entry);
50-
rv_detach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry);
51-
}
42+
bool res = true;
43+
44+
if (curr_state == any_opid && event == sched_need_resched_opid)
45+
res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull;
46+
else if (curr_state == any_opid && event == sched_waking_opid)
47+
res = ha_get_env(ha_mon, irq_off_opid, time_ns) == 1ull &&
48+
ha_get_env(ha_mon, preempt_off_opid, time_ns) == 1ull;
49+
return res;
5250
}
5351

54-
#else
55-
/* We assume irq_entry tracepoints are sufficient on other architectures */
56-
static void attach_vector_irq(void) { }
57-
static void detach_vector_irq(void) { }
58-
#endif
59-
60-
static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip)
52+
static bool ha_verify_constraint(struct ha_monitor *ha_mon,
53+
enum states curr_state, enum events event,
54+
enum states next_state, u64 time_ns)
6155
{
62-
da_handle_event(irq_disable_opid);
63-
}
56+
if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns))
57+
return false;
6458

65-
static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip)
66-
{
67-
da_handle_event(irq_enable_opid);
68-
}
69-
70-
static void handle_irq_entry(void *data, int irq, struct irqaction *action)
71-
{
72-
da_handle_event(irq_entry_opid);
73-
}
74-
75-
static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
76-
{
77-
da_handle_event(preempt_disable_opid);
78-
}
79-
80-
static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip)
81-
{
82-
da_handle_event(preempt_enable_opid);
59+
return true;
8360
}
8461

8562
static void handle_sched_need_resched(void *data, struct task_struct *tsk, int cpu, int tif)
8663
{
87-
/* The monitor's intitial state is not in_irq */
88-
if (this_cpu_read(hardirq_context))
89-
da_handle_event(sched_need_resched_opid);
90-
else
91-
da_handle_start_event(sched_need_resched_opid);
64+
da_handle_start_run_event(sched_need_resched_opid);
9265
}
9366

9467
static void handle_sched_waking(void *data, struct task_struct *p)
9568
{
96-
/* The monitor's intitial state is not in_irq */
97-
if (this_cpu_read(hardirq_context))
98-
da_handle_event(sched_waking_opid);
99-
else
100-
da_handle_start_event(sched_waking_opid);
69+
da_handle_start_run_event(sched_waking_opid);
10170
}
10271

10372
static int enable_opid(void)
@@ -108,14 +77,8 @@ static int enable_opid(void)
10877
if (retval)
10978
return retval;
11079

111-
rv_attach_trace_probe("opid", irq_disable, handle_irq_disable);
112-
rv_attach_trace_probe("opid", irq_enable, handle_irq_enable);
113-
rv_attach_trace_probe("opid", irq_handler_entry, handle_irq_entry);
114-
rv_attach_trace_probe("opid", preempt_disable, handle_preempt_disable);
115-
rv_attach_trace_probe("opid", preempt_enable, handle_preempt_enable);
11680
rv_attach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched);
11781
rv_attach_trace_probe("opid", sched_waking, handle_sched_waking);
118-
attach_vector_irq();
11982

12083
return 0;
12184
}
@@ -124,14 +87,8 @@ static void disable_opid(void)
12487
{
12588
rv_this.enabled = 0;
12689

127-
rv_detach_trace_probe("opid", irq_disable, handle_irq_disable);
128-
rv_detach_trace_probe("opid", irq_enable, handle_irq_enable);
129-
rv_detach_trace_probe("opid", irq_handler_entry, handle_irq_entry);
130-
rv_detach_trace_probe("opid", preempt_disable, handle_preempt_disable);
131-
rv_detach_trace_probe("opid", preempt_enable, handle_preempt_enable);
13290
rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched);
13391
rv_detach_trace_probe("opid", sched_waking, handle_sched_waking);
134-
detach_vector_irq();
13592

13693
da_monitor_destroy();
13794
}

kernel/trace/rv/monitors/opid/opid.h

Lines changed: 19 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -8,99 +8,51 @@
88
#define MONITOR_NAME opid
99

1010
enum states_opid {
11-
disabled_opid,
12-
enabled_opid,
13-
in_irq_opid,
14-
irq_disabled_opid,
15-
preempt_disabled_opid,
11+
any_opid,
1612
state_max_opid,
1713
};
1814

1915
#define INVALID_STATE state_max_opid
2016

2117
enum events_opid {
22-
irq_disable_opid,
23-
irq_enable_opid,
24-
irq_entry_opid,
25-
preempt_disable_opid,
26-
preempt_enable_opid,
2718
sched_need_resched_opid,
2819
sched_waking_opid,
2920
event_max_opid,
3021
};
3122

23+
enum envs_opid {
24+
irq_off_opid,
25+
preempt_off_opid,
26+
env_max_opid,
27+
env_max_stored_opid = irq_off_opid,
28+
};
29+
30+
_Static_assert(env_max_stored_opid <= MAX_HA_ENV_LEN, "Not enough slots");
31+
3232
struct automaton_opid {
3333
char *state_names[state_max_opid];
3434
char *event_names[event_max_opid];
35+
char *env_names[env_max_opid];
3536
unsigned char function[state_max_opid][event_max_opid];
3637
unsigned char initial_state;
3738
bool final_states[state_max_opid];
3839
};
3940

4041
static const struct automaton_opid automaton_opid = {
4142
.state_names = {
42-
"disabled",
43-
"enabled",
44-
"in_irq",
45-
"irq_disabled",
46-
"preempt_disabled",
43+
"any",
4744
},
4845
.event_names = {
49-
"irq_disable",
50-
"irq_enable",
51-
"irq_entry",
52-
"preempt_disable",
53-
"preempt_enable",
5446
"sched_need_resched",
5547
"sched_waking",
5648
},
49+
.env_names = {
50+
"irq_off",
51+
"preempt_off",
52+
},
5753
.function = {
58-
{
59-
INVALID_STATE,
60-
preempt_disabled_opid,
61-
disabled_opid,
62-
INVALID_STATE,
63-
irq_disabled_opid,
64-
disabled_opid,
65-
disabled_opid,
66-
},
67-
{
68-
irq_disabled_opid,
69-
INVALID_STATE,
70-
INVALID_STATE,
71-
preempt_disabled_opid,
72-
enabled_opid,
73-
INVALID_STATE,
74-
INVALID_STATE,
75-
},
76-
{
77-
INVALID_STATE,
78-
enabled_opid,
79-
in_irq_opid,
80-
INVALID_STATE,
81-
INVALID_STATE,
82-
in_irq_opid,
83-
in_irq_opid,
84-
},
85-
{
86-
INVALID_STATE,
87-
enabled_opid,
88-
in_irq_opid,
89-
disabled_opid,
90-
INVALID_STATE,
91-
irq_disabled_opid,
92-
INVALID_STATE,
93-
},
94-
{
95-
disabled_opid,
96-
INVALID_STATE,
97-
INVALID_STATE,
98-
INVALID_STATE,
99-
enabled_opid,
100-
INVALID_STATE,
101-
INVALID_STATE,
102-
},
54+
{ any_opid, any_opid },
10355
},
104-
.initial_state = disabled_opid,
105-
.final_states = { 0, 1, 0, 0, 0 },
56+
.initial_state = any_opid,
57+
.final_states = { 1 },
10658
};

kernel/trace/rv/monitors/opid/opid_trace.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,8 @@ DEFINE_EVENT(event_da_monitor, event_opid,
1212
DEFINE_EVENT(error_da_monitor, error_opid,
1313
TP_PROTO(char *state, char *event),
1414
TP_ARGS(state, event));
15+
16+
DEFINE_EVENT(error_env_da_monitor, error_env_opid,
17+
TP_PROTO(char *state, char *event, char *env),
18+
TP_ARGS(state, event, env));
1519
#endif /* CONFIG_RV_MON_OPID */

kernel/trace/rv/rv_trace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ DECLARE_EVENT_CLASS(error_da_monitor,
6262
#include <monitors/scpd/scpd_trace.h>
6363
#include <monitors/snep/snep_trace.h>
6464
#include <monitors/sts/sts_trace.h>
65-
#include <monitors/opid/opid_trace.h>
6665
// Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here
6766

6867
#ifdef CONFIG_HA_MON_EVENTS_IMPLICIT
@@ -91,6 +90,7 @@ DECLARE_EVENT_CLASS(error_env_da_monitor,
9190
__get_str(env))
9291
);
9392

93+
#include <monitors/opid/opid_trace.h>
9494
// Add new monitors based on CONFIG_HA_MON_EVENTS_IMPLICIT here
9595

9696
#endif

0 commit comments

Comments
 (0)