Skip to content

Commit c037910

Browse files
kudureranganathsuperm1
authored andcommitted
cpufreq: Pass the policy to cpufreq_driver->adjust_perf()
cpufreq_cpu_get() can sleep on PREEMPT_RT in presence of concurrent writer(s), however amd-pstate depends on fetching the cpudata via the policy's driver data which necessitates grabbing the reference. Since schedutil governor can call "cpufreq_driver->update_perf()" during sched_tick/enqueue/dequeue with rq_lock held and IRQs disabled, fetching the policy object using the cpufreq_cpu_get() helper in the scheduler fast-path leads to "BUG: scheduling while atomic" on PREEMPT_RT [1]. Pass the cached cpufreq policy object in sg_policy to the update_perf() instead of just the CPU. The CPU can be inferred using "policy->cpu". The lifetime of cpufreq_policy object outlasts that of the governor and the cpufreq driver (allocated when the CPU is onlined and only reclaimed when the CPU is offlined / the CPU device is removed) which makes it safe to be referenced throughout the governor's lifetime. Closes:https://lore.kernel.org/all/20250731092316.3191-1-spasswolf@web.de/ [1] Fixes: 1d215f0 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Reported-by: Bert Karwatzki <spasswolf@web.de> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> Acked-by: Gary Guo <gary@garyguo.net> # Rust Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com> Reviewed-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com> Link: https://lore.kernel.org/r/20260316081849.19368-3-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
1 parent 86d71f1 commit c037910

6 files changed

Lines changed: 17 additions & 18 deletions

File tree

drivers/cpufreq/amd-pstate.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,13 +788,12 @@ static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
788788
return policy->cur;
789789
}
790790

791-
static void amd_pstate_adjust_perf(unsigned int cpu,
791+
static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
792792
unsigned long _min_perf,
793793
unsigned long target_perf,
794794
unsigned long capacity)
795795
{
796796
u8 max_perf, min_perf, des_perf, cap_perf;
797-
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
798797
struct amd_cpudata *cpudata;
799798
union perf_cached perf;
800799

drivers/cpufreq/cpufreq.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,7 +2231,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
22312231

22322232
/**
22332233
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
2234-
* @cpu: Target CPU.
2234+
* @policy: cpufreq policy object of the target CPU.
22352235
* @min_perf: Minimum (required) performance level (units of @capacity).
22362236
* @target_perf: Target (desired) performance level (units of @capacity).
22372237
* @capacity: Capacity of the target CPU.
@@ -2250,12 +2250,12 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
22502250
* parallel with either ->target() or ->target_index() or ->fast_switch() for
22512251
* the same CPU.
22522252
*/
2253-
void cpufreq_driver_adjust_perf(unsigned int cpu,
2253+
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
22542254
unsigned long min_perf,
22552255
unsigned long target_perf,
22562256
unsigned long capacity)
22572257
{
2258-
cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
2258+
cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
22592259
}
22602260

22612261
/**

drivers/cpufreq/intel_pstate.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3239,12 +3239,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
32393239
return target_pstate * cpu->pstate.scaling;
32403240
}
32413241

3242-
static void intel_cpufreq_adjust_perf(unsigned int cpunum,
3242+
static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
32433243
unsigned long min_perf,
32443244
unsigned long target_perf,
32453245
unsigned long capacity)
32463246
{
3247-
struct cpudata *cpu = all_cpu_data[cpunum];
3247+
struct cpudata *cpu = all_cpu_data[policy->cpu];
32483248
u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
32493249
int old_pstate = cpu->pstate.current_pstate;
32503250
int cap_pstate, min_pstate, max_pstate, target_pstate;

include/linux/cpufreq.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ struct cpufreq_driver {
372372
* conditions) scale invariance can be disabled, which causes the
373373
* schedutil governor to fall back to the latter.
374374
*/
375-
void (*adjust_perf)(unsigned int cpu,
375+
void (*adjust_perf)(struct cpufreq_policy *policy,
376376
unsigned long min_perf,
377377
unsigned long target_perf,
378378
unsigned long capacity);
@@ -617,7 +617,7 @@ struct cpufreq_governor {
617617
/* Pass a target to the cpufreq driver */
618618
unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
619619
unsigned int target_freq);
620-
void cpufreq_driver_adjust_perf(unsigned int cpu,
620+
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
621621
unsigned long min_perf,
622622
unsigned long target_perf,
623623
unsigned long capacity);

kernel/sched/cpufreq_schedutil.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
461461
unsigned int flags)
462462
{
463463
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
464+
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
464465
unsigned long prev_util = sg_cpu->util;
465466
unsigned long max_cap;
466467

@@ -482,10 +483,10 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
482483
if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
483484
sg_cpu->util = prev_util;
484485

485-
cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min,
486+
cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
486487
sg_cpu->util, max_cap);
487488

488-
sg_cpu->sg_policy->last_freq_update_time = time;
489+
sg_policy->last_freq_update_time = time;
489490
}
490491

491492
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)

rust/kernel/cpufreq.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,18 +1257,17 @@ impl<T: Driver> Registration<T> {
12571257
/// # Safety
12581258
///
12591259
/// - This function may only be called from the cpufreq C infrastructure.
1260+
/// - The pointer arguments must be valid pointers.
12601261
unsafe extern "C" fn adjust_perf_callback(
1261-
cpu: c_uint,
1262+
ptr: *mut bindings::cpufreq_policy,
12621263
min_perf: c_ulong,
12631264
target_perf: c_ulong,
12641265
capacity: c_ulong,
12651266
) {
1266-
// SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
1267-
let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
1268-
1269-
if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) {
1270-
T::adjust_perf(&mut policy, min_perf, target_perf, capacity);
1271-
}
1267+
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
1268+
// lifetime of `policy`.
1269+
let policy = unsafe { Policy::from_raw_mut(ptr) };
1270+
T::adjust_perf(policy, min_perf, target_perf, capacity);
12721271
}
12731272

12741273
/// Driver's `get_intermediate` callback.

0 commit comments

Comments
 (0)