Skip to content

Commit 12ae2c8

Browse files
committed
clone: add CLONE_AUTOREAP
Add a new clone3() flag CLONE_AUTOREAP that makes a child process auto-reap on exit without ever becoming a zombie. This is a per-process property in contrast to the existing auto-reap mechanism via SA_NOCLDWAIT or SIG_IGN for SIGCHLD which applies to all children of a given parent. Currently the only way to automatically reap children is to set SA_NOCLDWAIT or SIG_IGN on SIGCHLD. This is a parent-scoped property affecting all children which makes it unsuitable for libraries or applications that need selective auto-reaping of specific children while still being able to wait() on others. CLONE_AUTOREAP stores an autoreap flag in the child's signal_struct. When the child exits do_notify_parent() checks this flag and causes exit_notify() to transition the task directly to EXIT_DEAD. Since the flag lives on the child it survives reparenting: if the original parent exits and the child is reparented to a subreaper or init the child still auto-reaps when it eventually exits. CLONE_AUTOREAP can be combined with CLONE_PIDFD to allow the parent to monitor the child's exit via poll() and retrieve exit status via PIDFD_GET_INFO. Without CLONE_PIDFD it provides a fire-and-forget pattern where the parent simply doesn't care about the child's exit status. No exit signal is delivered so exit_signal must be zero. CLONE_AUTOREAP is rejected in combination with CLONE_PARENT. If a CLONE_AUTOREAP child were to clone(CLONE_PARENT) the new grandchild would inherit exit_signal == 0 from the autoreap parent's group leader but without signal->autoreap. This grandchild would become a zombie that never sends a signal and is never autoreaped - confusing and arguably broken behavior. The flag is not inherited by the autoreap process's own children. Each child that should be autoreaped must be explicitly created with CLONE_AUTOREAP. Link: uapi-group/kernel-features#45 Link: https://patch.msgid.link/20260226-work-pidfs-autoreap-v5-1-d148b984a989@kernel.org Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 6de23f8 commit 12ae2c8

5 files changed

Lines changed: 26 additions & 4 deletions

File tree

include/linux/sched/signal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ struct signal_struct {
132132
*/
133133
unsigned int is_child_subreaper:1;
134134
unsigned int has_child_subreaper:1;
135+
unsigned int autoreap:1;
135136

136137
#ifdef CONFIG_POSIX_TIMERS
137138

include/uapi/linux/sched.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434
#define CLONE_IO 0x80000000 /* Clone io context */
3535

3636
/* Flags for the clone3() syscall. */
37-
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
38-
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
37+
#define CLONE_CLEAR_SIGHAND (1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */
38+
#define CLONE_INTO_CGROUP (1ULL << 33) /* Clone into a specific cgroup given the right permissions. */
39+
#define CLONE_AUTOREAP (1ULL << 34) /* Auto-reap child on exit. */
3940

4041
/*
4142
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3

kernel/fork.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2028,6 +2028,18 @@ __latent_entropy struct task_struct *copy_process(
20282028
return ERR_PTR(-EINVAL);
20292029
}
20302030

2031+
if (clone_flags & CLONE_AUTOREAP) {
2032+
if (clone_flags & CLONE_THREAD)
2033+
return ERR_PTR(-EINVAL);
2034+
if (clone_flags & CLONE_PARENT)
2035+
return ERR_PTR(-EINVAL);
2036+
if (args->exit_signal)
2037+
return ERR_PTR(-EINVAL);
2038+
}
2039+
2040+
if ((clone_flags & CLONE_PARENT) && current->signal->autoreap)
2041+
return ERR_PTR(-EINVAL);
2042+
20312043
/*
20322044
* Force any signals received before this point to be delivered
20332045
* before the fork happens. Collect up signals sent to multiple
@@ -2435,6 +2447,8 @@ __latent_entropy struct task_struct *copy_process(
24352447
*/
24362448
p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
24372449
p->real_parent->signal->is_child_subreaper;
2450+
if (clone_flags & CLONE_AUTOREAP)
2451+
p->signal->autoreap = 1;
24382452
list_add_tail(&p->sibling, &p->real_parent->children);
24392453
list_add_tail_rcu(&p->tasks, &init_task.tasks);
24402454
attach_pid(p, PIDTYPE_TGID);
@@ -2897,7 +2911,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
28972911
{
28982912
/* Verify that no unknown flags are passed along. */
28992913
if (kargs->flags &
2900-
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
2914+
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP |
2915+
CLONE_AUTOREAP))
29012916
return false;
29022917

29032918
/*

kernel/ptrace.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,8 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
549549
if (!dead && thread_group_empty(p)) {
550550
if (!same_thread_group(p->real_parent, tracer))
551551
dead = do_notify_parent(p, p->exit_signal);
552-
else if (ignoring_children(tracer->sighand)) {
552+
else if (ignoring_children(tracer->sighand) ||
553+
p->signal->autoreap) {
553554
__wake_up_parent(p, tracer);
554555
dead = true;
555556
}

kernel/signal.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2251,6 +2251,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
22512251
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
22522252
sig = 0;
22532253
}
2254+
if (!tsk->ptrace && tsk->signal->autoreap) {
2255+
autoreap = true;
2256+
sig = 0;
2257+
}
22542258
/*
22552259
* Send with __send_signal as si_pid and si_uid are in the
22562260
* parent's namespaces.

0 commit comments

Comments
 (0)