|
30 | 30 | #define CLONE_NNP (1ULL << 35) |
31 | 31 | #endif |
32 | 32 |
|
| 33 | +#ifndef CLONE_PIDFD_AUTOKILL |
| 34 | +#define CLONE_PIDFD_AUTOKILL (1ULL << 36) |
| 35 | +#endif |
| 36 | + |
| 37 | +#ifndef _LINUX_CAPABILITY_VERSION_3 |
| 38 | +#define _LINUX_CAPABILITY_VERSION_3 0x20080522 |
| 39 | +#endif |
| 40 | + |
| 41 | +struct cap_header { |
| 42 | + __u32 version; |
| 43 | + int pid; |
| 44 | +}; |
| 45 | + |
| 46 | +struct cap_data { |
| 47 | + __u32 effective; |
| 48 | + __u32 permitted; |
| 49 | + __u32 inheritable; |
| 50 | +}; |
| 51 | + |
| 52 | +static int drop_all_caps(void) |
| 53 | +{ |
| 54 | + struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 }; |
| 55 | + struct cap_data data[2] = {}; |
| 56 | + |
| 57 | + return syscall(__NR_capset, &hdr, data); |
| 58 | +} |
| 59 | + |
33 | 60 | static pid_t create_autoreap_child(int *pidfd) |
34 | 61 | { |
35 | 62 | struct __clone_args args = { |
@@ -619,4 +646,255 @@ TEST(autoreap_no_new_privs_unset) |
619 | 646 | close(pidfd); |
620 | 647 | } |
621 | 648 |
|
| 649 | +/* |
| 650 | + * Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP. |
| 651 | + */ |
| 652 | +static pid_t create_autokill_child(int *pidfd) |
| 653 | +{ |
| 654 | + struct __clone_args args = { |
| 655 | + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | |
| 656 | + CLONE_AUTOREAP | CLONE_NNP, |
| 657 | + .exit_signal = 0, |
| 658 | + .pidfd = ptr_to_u64(pidfd), |
| 659 | + }; |
| 660 | + |
| 661 | + return sys_clone3(&args, sizeof(args)); |
| 662 | +} |
| 663 | + |
| 664 | +/* |
| 665 | + * Basic autokill test: child blocks in pause(), parent closes the |
| 666 | + * clone3 pidfd, child should be killed and autoreaped. |
| 667 | + */ |
| 668 | +TEST(autokill_basic) |
| 669 | +{ |
| 670 | + int pidfd = -1, pollfd_fd = -1, ret; |
| 671 | + struct pollfd pfd; |
| 672 | + pid_t pid; |
| 673 | + |
| 674 | + pid = create_autokill_child(&pidfd); |
| 675 | + if (pid < 0 && errno == EINVAL) |
| 676 | + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); |
| 677 | + ASSERT_GE(pid, 0); |
| 678 | + |
| 679 | + if (pid == 0) { |
| 680 | + pause(); |
| 681 | + _exit(1); |
| 682 | + } |
| 683 | + |
| 684 | + ASSERT_GE(pidfd, 0); |
| 685 | + |
| 686 | + /* |
| 687 | + * Open a second pidfd via pidfd_open() so we can observe the |
| 688 | + * child's death after closing the clone3 pidfd. |
| 689 | + */ |
| 690 | + pollfd_fd = sys_pidfd_open(pid, 0); |
| 691 | + ASSERT_GE(pollfd_fd, 0); |
| 692 | + |
| 693 | + /* Close the clone3 pidfd — this should trigger autokill. */ |
| 694 | + close(pidfd); |
| 695 | + |
| 696 | + /* Wait for the child to die via the pidfd_open'd fd. */ |
| 697 | + pfd.fd = pollfd_fd; |
| 698 | + pfd.events = POLLIN; |
| 699 | + ret = poll(&pfd, 1, 5000); |
| 700 | + ASSERT_EQ(ret, 1); |
| 701 | + ASSERT_TRUE(pfd.revents & POLLIN); |
| 702 | + |
| 703 | + /* Child should be autoreaped — no zombie. */ |
| 704 | + usleep(100000); |
| 705 | + ret = waitpid(pid, NULL, WNOHANG); |
| 706 | + ASSERT_EQ(ret, -1); |
| 707 | + ASSERT_EQ(errno, ECHILD); |
| 708 | + |
| 709 | + close(pollfd_fd); |
| 710 | +} |
| 711 | + |
| 712 | +/* |
| 713 | + * CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL. |
| 714 | + */ |
| 715 | +TEST(autokill_requires_pidfd) |
| 716 | +{ |
| 717 | + struct __clone_args args = { |
| 718 | + .flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP, |
| 719 | + .exit_signal = 0, |
| 720 | + }; |
| 721 | + pid_t pid; |
| 722 | + |
| 723 | + pid = sys_clone3(&args, sizeof(args)); |
| 724 | + ASSERT_EQ(pid, -1); |
| 725 | + ASSERT_EQ(errno, EINVAL); |
| 726 | +} |
| 727 | + |
| 728 | +/* |
| 729 | + * CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL. |
| 730 | + */ |
| 731 | +TEST(autokill_requires_autoreap) |
| 732 | +{ |
| 733 | + int pidfd = -1; |
| 734 | + struct __clone_args args = { |
| 735 | + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL, |
| 736 | + .exit_signal = 0, |
| 737 | + .pidfd = ptr_to_u64(&pidfd), |
| 738 | + }; |
| 739 | + pid_t pid; |
| 740 | + |
| 741 | + pid = sys_clone3(&args, sizeof(args)); |
| 742 | + ASSERT_EQ(pid, -1); |
| 743 | + ASSERT_EQ(errno, EINVAL); |
| 744 | +} |
| 745 | + |
| 746 | +/* |
| 747 | + * CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL. |
| 748 | + */ |
| 749 | +TEST(autokill_rejects_thread) |
| 750 | +{ |
| 751 | + int pidfd = -1; |
| 752 | + struct __clone_args args = { |
| 753 | + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | |
| 754 | + CLONE_AUTOREAP | CLONE_THREAD | |
| 755 | + CLONE_SIGHAND | CLONE_VM, |
| 756 | + .exit_signal = 0, |
| 757 | + .pidfd = ptr_to_u64(&pidfd), |
| 758 | + }; |
| 759 | + pid_t pid; |
| 760 | + |
| 761 | + pid = sys_clone3(&args, sizeof(args)); |
| 762 | + ASSERT_EQ(pid, -1); |
| 763 | + ASSERT_EQ(errno, EINVAL); |
| 764 | +} |
| 765 | + |
| 766 | +/* |
| 767 | + * Test that only the clone3 pidfd triggers autokill, not pidfd_open(). |
| 768 | + * Close the pidfd_open'd fd first — child should survive. |
| 769 | + * Then close the clone3 pidfd — child should be killed and autoreaped. |
| 770 | + */ |
| 771 | +TEST(autokill_pidfd_open_no_effect) |
| 772 | +{ |
| 773 | + int pidfd = -1, open_fd = -1, ret; |
| 774 | + struct pollfd pfd; |
| 775 | + pid_t pid; |
| 776 | + |
| 777 | + pid = create_autokill_child(&pidfd); |
| 778 | + if (pid < 0 && errno == EINVAL) |
| 779 | + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); |
| 780 | + ASSERT_GE(pid, 0); |
| 781 | + |
| 782 | + if (pid == 0) { |
| 783 | + pause(); |
| 784 | + _exit(1); |
| 785 | + } |
| 786 | + |
| 787 | + ASSERT_GE(pidfd, 0); |
| 788 | + |
| 789 | + /* Open a second pidfd via pidfd_open(). */ |
| 790 | + open_fd = sys_pidfd_open(pid, 0); |
| 791 | + ASSERT_GE(open_fd, 0); |
| 792 | + |
| 793 | + /* |
| 794 | + * Close the pidfd_open'd fd — child should survive because |
| 795 | + * only the clone3 pidfd has autokill. |
| 796 | + */ |
| 797 | + close(open_fd); |
| 798 | + usleep(200000); |
| 799 | + |
| 800 | + /* Verify child is still alive by polling the clone3 pidfd. */ |
| 801 | + pfd.fd = pidfd; |
| 802 | + pfd.events = POLLIN; |
| 803 | + ret = poll(&pfd, 1, 0); |
| 804 | + ASSERT_EQ(ret, 0) { |
| 805 | + TH_LOG("Child died after closing pidfd_open fd — should still be alive"); |
| 806 | + } |
| 807 | + |
| 808 | + /* Open another observation fd before triggering autokill. */ |
| 809 | + open_fd = sys_pidfd_open(pid, 0); |
| 810 | + ASSERT_GE(open_fd, 0); |
| 811 | + |
| 812 | + /* Now close the clone3 pidfd — this triggers autokill. */ |
| 813 | + close(pidfd); |
| 814 | + |
| 815 | + pfd.fd = open_fd; |
| 816 | + pfd.events = POLLIN; |
| 817 | + ret = poll(&pfd, 1, 5000); |
| 818 | + ASSERT_EQ(ret, 1); |
| 819 | + ASSERT_TRUE(pfd.revents & POLLIN); |
| 820 | + |
| 821 | + /* Child should be autoreaped — no zombie. */ |
| 822 | + usleep(100000); |
| 823 | + ret = waitpid(pid, NULL, WNOHANG); |
| 824 | + ASSERT_EQ(ret, -1); |
| 825 | + ASSERT_EQ(errno, ECHILD); |
| 826 | + |
| 827 | + close(open_fd); |
| 828 | +} |
| 829 | + |
| 830 | +/* |
| 831 | + * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM |
| 832 | + * for an unprivileged caller. |
| 833 | + */ |
| 834 | +TEST(autokill_requires_cap_sys_admin) |
| 835 | +{ |
| 836 | + int pidfd = -1, ret; |
| 837 | + struct __clone_args args = { |
| 838 | + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | |
| 839 | + CLONE_AUTOREAP, |
| 840 | + .exit_signal = 0, |
| 841 | + .pidfd = ptr_to_u64(&pidfd), |
| 842 | + }; |
| 843 | + pid_t pid; |
| 844 | + |
| 845 | + /* Drop all capabilities so we lack CAP_SYS_ADMIN. */ |
| 846 | + ret = drop_all_caps(); |
| 847 | + ASSERT_EQ(ret, 0); |
| 848 | + |
| 849 | + pid = sys_clone3(&args, sizeof(args)); |
| 850 | + ASSERT_EQ(pid, -1); |
| 851 | + ASSERT_EQ(errno, EPERM); |
| 852 | +} |
| 853 | + |
| 854 | +/* |
| 855 | + * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with |
| 856 | + * CAP_SYS_ADMIN. |
| 857 | + */ |
| 858 | +TEST(autokill_without_nnp_with_cap) |
| 859 | +{ |
| 860 | + struct __clone_args args = { |
| 861 | + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | |
| 862 | + CLONE_AUTOREAP, |
| 863 | + .exit_signal = 0, |
| 864 | + }; |
| 865 | + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; |
| 866 | + int pidfd = -1, ret; |
| 867 | + struct pollfd pfd; |
| 868 | + pid_t pid; |
| 869 | + |
| 870 | + if (geteuid() != 0) |
| 871 | + SKIP(return, "Need root/CAP_SYS_ADMIN"); |
| 872 | + |
| 873 | + args.pidfd = ptr_to_u64(&pidfd); |
| 874 | + |
| 875 | + pid = sys_clone3(&args, sizeof(args)); |
| 876 | + if (pid < 0 && errno == EINVAL) |
| 877 | + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); |
| 878 | + ASSERT_GE(pid, 0); |
| 879 | + |
| 880 | + if (pid == 0) |
| 881 | + _exit(0); |
| 882 | + |
| 883 | + ASSERT_GE(pidfd, 0); |
| 884 | + |
| 885 | + /* Wait for child to exit. */ |
| 886 | + pfd.fd = pidfd; |
| 887 | + pfd.events = POLLIN; |
| 888 | + ret = poll(&pfd, 1, 5000); |
| 889 | + ASSERT_EQ(ret, 1); |
| 890 | + |
| 891 | + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); |
| 892 | + ASSERT_EQ(ret, 0); |
| 893 | + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); |
| 894 | + ASSERT_TRUE(WIFEXITED(info.exit_code)); |
| 895 | + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); |
| 896 | + |
| 897 | + close(pidfd); |
| 898 | +} |
| 899 | + |
622 | 900 | TEST_HARNESS_MAIN |
0 commit comments