Skip to content

Commit b2e48c4

Browse files
Thomas GleixnerPeter Zijlstra
authored andcommitted
sched/mmcid: Prevent CID stalls due to concurrent forks
A newly forked task is accounted as MMCID user before the task is visible in the process' thread list and the global task list. This creates the following problem: CPU1 CPU2 fork() sched_mm_cid_fork(tnew1) tnew1->mm.mm_cid_users++; tnew1->mm_cid.cid = getcid() -> preemption fork() sched_mm_cid_fork(tnew2) tnew2->mm.mm_cid_users++; // Reaches the per CPU threshold mm_cid_fixup_tasks_to_cpus() for_each_other(current, p) .... As tnew1 is not visible yet, this fails to fix up the already allocated CID of tnew1. As a consequence a subsequent schedule in might fail to acquire a (transitional) CID and the machine stalls. Move the invocation of sched_mm_cid_fork() after the new task becomes visible in the thread and the task list to prevent this. This also makes it symmetrical vs. exit() where the task is removed as CID user before the task is removed from the thread and task lists. Fixes: fbd0e71 ("sched/mmcid: Provide CID ownership mode fixup functions") Signed-off-by: Thomas Gleixner <tglx@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Link: https://patch.msgid.link/20260310202525.969061974@kernel.org
1 parent 1f318b9 commit b2e48c4

3 files changed

Lines changed: 15 additions & 11 deletions

File tree

include/linux/sched.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,7 +2354,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
23542354
#ifdef CONFIG_SCHED_MM_CID
23552355
void sched_mm_cid_before_execve(struct task_struct *t);
23562356
void sched_mm_cid_after_execve(struct task_struct *t);
2357-
void sched_mm_cid_fork(struct task_struct *t);
23582357
void sched_mm_cid_exit(struct task_struct *t);
23592358
static __always_inline int task_mm_cid(struct task_struct *t)
23602359
{
@@ -2363,7 +2362,6 @@ static __always_inline int task_mm_cid(struct task_struct *t)
23632362
#else
23642363
static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
23652364
static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
2366-
static inline void sched_mm_cid_fork(struct task_struct *t) { }
23672365
static inline void sched_mm_cid_exit(struct task_struct *t) { }
23682366
static __always_inline int task_mm_cid(struct task_struct *t)
23692367
{

kernel/fork.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,6 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk)
15861586

15871587
tsk->mm = mm;
15881588
tsk->active_mm = mm;
1589-
sched_mm_cid_fork(tsk);
15901589
return 0;
15911590
}
15921591

@@ -2498,7 +2497,6 @@ __latent_entropy struct task_struct *copy_process(
24982497
exit_nsproxy_namespaces(p);
24992498
bad_fork_cleanup_mm:
25002499
if (p->mm) {
2501-
sched_mm_cid_exit(p);
25022500
mm_clear_owner(p->mm, p);
25032501
mmput(p->mm);
25042502
}

kernel/sched/core.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4729,8 +4729,11 @@ void sched_cancel_fork(struct task_struct *p)
47294729
scx_cancel_fork(p);
47304730
}
47314731

4732+
static void sched_mm_cid_fork(struct task_struct *t);
4733+
47324734
void sched_post_fork(struct task_struct *p)
47334735
{
4736+
sched_mm_cid_fork(p);
47344737
uclamp_post_fork(p);
47354738
scx_post_fork(p);
47364739
}
@@ -10646,12 +10649,13 @@ static void mm_cid_do_fixup_tasks_to_cpus(struct mm_struct *mm)
1064610649
* possible switch back to per task mode happens either in the
1064710650
* deferred handler function or in the next fork()/exit().
1064810651
*
10649-
* The caller has already transferred. The newly incoming task is
10650-
* already accounted for, but not yet visible.
10652+
* The caller has already transferred so remove it from the users
10653+
* count. The incoming task is already visible and has mm_cid.active,
10654+
* but has task::mm_cid::cid == UNSET. Still it needs to be accounted
10655+
* for. Concurrent fork()s might add more threads, but all of them have
10656+
* task::mm_cid::active = 0, so they don't affect the accounting here.
1065110657
*/
10652-
users = mm->mm_cid.users - 2;
10653-
if (!users)
10654-
return;
10658+
users = mm->mm_cid.users - 1;
1065510659

1065610660
guard(rcu)();
1065710661
for_other_threads(current, t) {
@@ -10688,12 +10692,15 @@ static bool sched_mm_cid_add_user(struct task_struct *t, struct mm_struct *mm)
1068810692
return mm_update_max_cids(mm);
1068910693
}
1069010694

10691-
void sched_mm_cid_fork(struct task_struct *t)
10695+
static void sched_mm_cid_fork(struct task_struct *t)
1069210696
{
1069310697
struct mm_struct *mm = t->mm;
1069410698
bool percpu;
1069510699

10696-
WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET);
10700+
if (!mm)
10701+
return;
10702+
10703+
WARN_ON_ONCE(t->mm_cid.cid != MM_CID_UNSET);
1069710704

1069810705
guard(mutex)(&mm->mm_cid.mutex);
1069910706
scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) {
@@ -10885,6 +10892,7 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
1088510892
}
1088610893
#else /* CONFIG_SCHED_MM_CID */
1088710894
static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk) { }
10895+
static inline void sched_mm_cid_fork(struct task_struct *t) { }
1088810896
#endif /* !CONFIG_SCHED_MM_CID */
1088910897

1089010898
static DEFINE_PER_CPU(struct sched_change_ctx, sched_change_ctx);

0 commit comments

Comments
 (0)