github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/advise/seccomp/tracer/bpf/seccomp.bpf.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021 The Inspektor Gadget authors */ 3 4 /* This BPF program uses the GPL-restricted function bpf_probe_read*(). 5 */ 6 7 #include <vmlinux.h> 8 9 #include <bpf/bpf_helpers.h> 10 #include <bpf/bpf_core_read.h> 11 #include <bpf/bpf_tracing.h> 12 13 #include "seccomp-common.h" 14 15 #define TASK_COMM_LEN 16 16 #define TS_COMPAT 0x0002 17 18 // prctl syscall number from 19 // https://github.com/seccomp/libseccomp/blob/abad8a8f41fc13efbb95fc1ccaa3e181342bade7/src/syscalls.csv#L265 20 #ifndef __NR_prctl 21 #if defined(bpf_target_x86) 22 #define __NR_prctl 157 23 #elif defined(bpf_target_arm64) 24 #define __NR_prctl 167 25 #else 26 #error "Unsupported architecture" 27 #endif 28 #endif 29 30 // prclt syscall parameters from 31 // https://github.com/torvalds/linux/blob/5147da902e0dd162c6254a61e4c57f21b60a9b1c/include/uapi/linux/prctl.h#L10 32 // https://github.com/torvalds/linux/blob/5147da902e0dd162c6254a61e4c57f21b60a9b1c/include/uapi/linux/prctl.h#L175 33 #ifndef PR_GET_PDEATHSIG 34 #define PR_GET_PDEATHSIG 2 35 #endif 36 #ifndef PR_SET_NO_NEW_PRIVS 37 #define PR_SET_NO_NEW_PRIVS 38 38 #endif 39 40 // Seccomp syscall number from 41 // https://github.com/torvalds/linux/blob/v5.12/tools/testing/selftests/seccomp/seccomp_bpf.c#L115 42 // Only x86_64 is supported for now. 43 #ifndef __NR_seccomp 44 #if defined(bpf_target_x86) 45 #define __NR_seccomp 317 46 #elif defined(bpf_target_arm64) 47 #define __NR_seccomp 277 48 #else 49 #error "Unsupported architecture" 50 #endif 51 #endif 52 53 struct { 54 __uint(type, BPF_MAP_TYPE_HASH); 55 __type(key, __u64); 56 __type(value, unsigned char[SYSCALLS_MAP_VALUE_SIZE]); 57 __uint(max_entries, 1024); 58 } syscalls_per_mntns SEC(".maps"); 59 60 #ifdef __TARGET_ARCH_x86 61 static __always_inline int is_x86_compat(struct task_struct *task) 62 { 63 return !!(BPF_CORE_READ(task, thread_info.status) & TS_COMPAT); 64 } 65 #endif 66 67 SEC("raw_tracepoint/sys_enter") 68 int ig_seccomp_e(struct bpf_raw_tracepoint_args *ctx) 69 { 70 struct pt_regs regs = {}; 71 unsigned int id; 72 struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 73 74 bpf_probe_read(®s, sizeof(struct pt_regs), (void *)ctx->args[0]); 75 id = ctx->args[1]; 76 77 #ifdef __TARGET_ARCH_x86 78 if (is_x86_compat(task)) { 79 return 0; 80 } 81 #endif 82 83 if (id < 0 || id >= SYSCALLS_COUNT) 84 return 0; 85 86 char comm[TASK_COMM_LEN]; 87 bpf_get_current_comm(comm, sizeof(comm)); 88 int is_runc = comm[0] == 'r' && comm[1] == 'u' && comm[2] == 'n' && 89 comm[3] == 'c'; 90 91 __u64 mntns = BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); 92 if (mntns == 0) { 93 return 0; 94 } 95 96 unsigned char *syscall_bitmap = 97 bpf_map_lookup_elem(&syscalls_per_mntns, &mntns); 98 if (syscall_bitmap == 0) { 99 __u64 zero = 0; 100 unsigned char *blank_bitmap = 101 bpf_map_lookup_elem(&syscalls_per_mntns, &zero); 102 if (blank_bitmap == 0) 103 return 0; 104 bpf_map_update_elem(&syscalls_per_mntns, &mntns, blank_bitmap, 105 BPF_NOEXIST); 106 107 syscall_bitmap = 108 bpf_map_lookup_elem(&syscalls_per_mntns, &mntns); 109 if (syscall_bitmap == 0) 110 return 0; 111 } 112 113 // If it is runc, we want to record only the syscalls executed after the 114 // seccomp profile is actually installed. However, if we are running the 115 // seccomp-advisor gadget, it is very probably that the pod does not have 116 // a seccomp profile yet, so seccomp() will not be called. Therefore, we 117 // decide to start recording from the prctl(PR_GET_PDEATHSIG) call on. It 118 // is a safe place right before all the seccomp() calls that will be always 119 // executed during the runc initialisation: 120 // https://github.com/opencontainers/runc/blob/8b4a8f093d0dbdf45100597f710d16777845ee83/libcontainer/standard_init_linux.go#L148 121 if (is_runc) { 122 if (syscall_bitmap[SYSCALLS_COUNT] == 0) { 123 if (id == __NR_prctl && 124 PT_REGS_PARM1(®s) == PR_GET_PDEATHSIG) { 125 // Start recording the runc syscalls from now on. 126 syscall_bitmap[SYSCALLS_COUNT] = 1; 127 } 128 129 return 0; 130 } 131 132 // Record all the runc syscalls after prctl(PR_GET_PDEATHSIG) except 133 // for seccomp() and prctl(PR_SET_NO_NEW_PRIVS) because we know they 134 // are executed before the seccomp profile is installed. 135 if ((id == __NR_prctl && 136 PT_REGS_PARM1(®s) == PR_SET_NO_NEW_PRIVS) || 137 (id == __NR_seccomp)) { 138 return 0; 139 } 140 } 141 142 // Record the syscall 143 syscall_bitmap[id] = 0x01; 144 145 return 0; 146 } 147 148 char _license[] SEC("license") = "GPL";