github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/advise/seccomp/tracer/bpf/seccomp.bpf.c (about)

     1  // SPDX-License-Identifier: GPL-2.0
     2  /* Copyright (c) 2021 The Inspektor Gadget authors */
     3  
     4  /* This BPF program uses the GPL-restricted function bpf_probe_read*().
     5   */
     6  
     7  #include <vmlinux.h>
     8  
     9  #include <bpf/bpf_helpers.h>
    10  #include <bpf/bpf_core_read.h>
    11  #include <bpf/bpf_tracing.h>
    12  
    13  #include "seccomp-common.h"
    14  
    15  #define TASK_COMM_LEN 16
    16  #define TS_COMPAT 0x0002
    17  
    18  // prctl syscall number from
    19  // https://github.com/seccomp/libseccomp/blob/abad8a8f41fc13efbb95fc1ccaa3e181342bade7/src/syscalls.csv#L265
    20  #ifndef __NR_prctl
    21  #if defined(bpf_target_x86)
    22  #define __NR_prctl 157
    23  #elif defined(bpf_target_arm64)
    24  #define __NR_prctl 167
    25  #else
    26  #error "Unsupported architecture"
    27  #endif
    28  #endif
    29  
    30  // prclt syscall parameters from
    31  // https://github.com/torvalds/linux/blob/5147da902e0dd162c6254a61e4c57f21b60a9b1c/include/uapi/linux/prctl.h#L10
    32  // https://github.com/torvalds/linux/blob/5147da902e0dd162c6254a61e4c57f21b60a9b1c/include/uapi/linux/prctl.h#L175
    33  #ifndef PR_GET_PDEATHSIG
    34  #define PR_GET_PDEATHSIG 2
    35  #endif
    36  #ifndef PR_SET_NO_NEW_PRIVS
    37  #define PR_SET_NO_NEW_PRIVS 38
    38  #endif
    39  
    40  // Seccomp syscall number from
    41  // https://github.com/torvalds/linux/blob/v5.12/tools/testing/selftests/seccomp/seccomp_bpf.c#L115
    42  // Only x86_64 is supported for now.
    43  #ifndef __NR_seccomp
    44  #if defined(bpf_target_x86)
    45  #define __NR_seccomp 317
    46  #elif defined(bpf_target_arm64)
    47  #define __NR_seccomp 277
    48  #else
    49  #error "Unsupported architecture"
    50  #endif
    51  #endif
    52  
    53  struct {
    54  	__uint(type, BPF_MAP_TYPE_HASH);
    55  	__type(key, __u64);
    56  	__type(value, unsigned char[SYSCALLS_MAP_VALUE_SIZE]);
    57  	__uint(max_entries, 1024);
    58  } syscalls_per_mntns SEC(".maps");
    59  
    60  #ifdef __TARGET_ARCH_x86
    61  static __always_inline int is_x86_compat(struct task_struct *task)
    62  {
    63  	return !!(BPF_CORE_READ(task, thread_info.status) & TS_COMPAT);
    64  }
    65  #endif
    66  
    67  SEC("raw_tracepoint/sys_enter")
    68  int ig_seccomp_e(struct bpf_raw_tracepoint_args *ctx)
    69  {
    70  	struct pt_regs regs = {};
    71  	unsigned int id;
    72  	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
    73  
    74  	bpf_probe_read(&regs, sizeof(struct pt_regs), (void *)ctx->args[0]);
    75  	id = ctx->args[1];
    76  
    77  #ifdef __TARGET_ARCH_x86
    78  	if (is_x86_compat(task)) {
    79  		return 0;
    80  	}
    81  #endif
    82  
    83  	if (id < 0 || id >= SYSCALLS_COUNT)
    84  		return 0;
    85  
    86  	char comm[TASK_COMM_LEN];
    87  	bpf_get_current_comm(comm, sizeof(comm));
    88  	int is_runc = comm[0] == 'r' && comm[1] == 'u' && comm[2] == 'n' &&
    89  		      comm[3] == 'c';
    90  
    91  	__u64 mntns = BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
    92  	if (mntns == 0) {
    93  		return 0;
    94  	}
    95  
    96  	unsigned char *syscall_bitmap =
    97  		bpf_map_lookup_elem(&syscalls_per_mntns, &mntns);
    98  	if (syscall_bitmap == 0) {
    99  		__u64 zero = 0;
   100  		unsigned char *blank_bitmap =
   101  			bpf_map_lookup_elem(&syscalls_per_mntns, &zero);
   102  		if (blank_bitmap == 0)
   103  			return 0;
   104  		bpf_map_update_elem(&syscalls_per_mntns, &mntns, blank_bitmap,
   105  				    BPF_NOEXIST);
   106  
   107  		syscall_bitmap =
   108  			bpf_map_lookup_elem(&syscalls_per_mntns, &mntns);
   109  		if (syscall_bitmap == 0)
   110  			return 0;
   111  	}
   112  
   113  	// If it is runc, we want to record only the syscalls executed after the
   114  	// seccomp profile is actually installed. However, if we are running the
   115  	// seccomp-advisor gadget, it is very probably that the pod does not have
   116  	// a seccomp profile yet, so seccomp() will not be called. Therefore, we
   117  	// decide to start recording from the prctl(PR_GET_PDEATHSIG) call on. It
   118  	// is a safe place right before all the seccomp() calls that will be always
   119  	// executed during the runc initialisation:
   120  	// https://github.com/opencontainers/runc/blob/8b4a8f093d0dbdf45100597f710d16777845ee83/libcontainer/standard_init_linux.go#L148
   121  	if (is_runc) {
   122  		if (syscall_bitmap[SYSCALLS_COUNT] == 0) {
   123  			if (id == __NR_prctl &&
   124  			    PT_REGS_PARM1(&regs) == PR_GET_PDEATHSIG) {
   125  				// Start recording the runc syscalls from now on.
   126  				syscall_bitmap[SYSCALLS_COUNT] = 1;
   127  			}
   128  
   129  			return 0;
   130  		}
   131  
   132  		// Record all the runc syscalls after prctl(PR_GET_PDEATHSIG) except
   133  		// for seccomp() and prctl(PR_SET_NO_NEW_PRIVS) because we know they
   134  		// are executed before the seccomp profile is installed.
   135  		if ((id == __NR_prctl &&
   136  		     PT_REGS_PARM1(&regs) == PR_SET_NO_NEW_PRIVS) ||
   137  		    (id == __NR_seccomp)) {
   138  			return 0;
   139  		}
   140  	}
   141  
   142  	// Record the syscall
   143  	syscall_bitmap[id] = 0x01;
   144  
   145  	return 0;
   146  }
   147  
   148  char _license[] SEC("license") = "GPL";