github.com/lalkh/containerd@v1.4.3/contrib/seccomp/seccomp_default.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package seccomp
    20  
    21  import (
    22  	"runtime"
    23  
    24  	"golang.org/x/sys/unix"
    25  
    26  	"github.com/opencontainers/runtime-spec/specs-go"
    27  )
    28  
    29  func arches() []specs.Arch {
    30  	switch runtime.GOARCH {
    31  	case "amd64":
    32  		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
    33  	case "arm64":
    34  		return []specs.Arch{specs.ArchARM, specs.ArchAARCH64}
    35  	case "mips64":
    36  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    37  	case "mips64n32":
    38  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    39  	case "mipsel64":
    40  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    41  	case "mipsel64n32":
    42  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    43  	case "s390x":
    44  		return []specs.Arch{specs.ArchS390, specs.ArchS390X}
    45  	default:
    46  		return []specs.Arch{}
    47  	}
    48  }
    49  
    50  // DefaultProfile defines the allowed syscalls for the default seccomp profile.
    51  func DefaultProfile(sp *specs.Spec) *specs.LinuxSeccomp {
    52  	syscalls := []specs.LinuxSyscall{
    53  		{
    54  			Names: []string{
    55  				"accept",
    56  				"accept4",
    57  				"access",
    58  				"adjtimex",
    59  				"alarm",
    60  				"bind",
    61  				"brk",
    62  				"capget",
    63  				"capset",
    64  				"chdir",
    65  				"chmod",
    66  				"chown",
    67  				"chown32",
    68  				"clock_adjtime",
    69  				"clock_adjtime64",
    70  				"clock_getres",
    71  				"clock_getres_time64",
    72  				"clock_gettime",
    73  				"clock_gettime64",
    74  				"clock_nanosleep",
    75  				"clock_nanosleep_time64",
    76  				"close",
    77  				"connect",
    78  				"copy_file_range",
    79  				"creat",
    80  				"dup",
    81  				"dup2",
    82  				"dup3",
    83  				"epoll_create",
    84  				"epoll_create1",
    85  				"epoll_ctl",
    86  				"epoll_ctl_old",
    87  				"epoll_pwait",
    88  				"epoll_wait",
    89  				"epoll_wait_old",
    90  				"eventfd",
    91  				"eventfd2",
    92  				"execve",
    93  				"execveat",
    94  				"exit",
    95  				"exit_group",
    96  				"faccessat",
    97  				"faccessat2",
    98  				"fadvise64",
    99  				"fadvise64_64",
   100  				"fallocate",
   101  				"fanotify_mark",
   102  				"fchdir",
   103  				"fchmod",
   104  				"fchmodat",
   105  				"fchown",
   106  				"fchown32",
   107  				"fchownat",
   108  				"fcntl",
   109  				"fcntl64",
   110  				"fdatasync",
   111  				"fgetxattr",
   112  				"flistxattr",
   113  				"flock",
   114  				"fork",
   115  				"fremovexattr",
   116  				"fsetxattr",
   117  				"fstat",
   118  				"fstat64",
   119  				"fstatat64",
   120  				"fstatfs",
   121  				"fstatfs64",
   122  				"fsync",
   123  				"ftruncate",
   124  				"ftruncate64",
   125  				"futex",
   126  				"futex_time64",
   127  				"futimesat",
   128  				"getcpu",
   129  				"getcwd",
   130  				"getdents",
   131  				"getdents64",
   132  				"getegid",
   133  				"getegid32",
   134  				"geteuid",
   135  				"geteuid32",
   136  				"getgid",
   137  				"getgid32",
   138  				"getgroups",
   139  				"getgroups32",
   140  				"getitimer",
   141  				"getpeername",
   142  				"getpgid",
   143  				"getpgrp",
   144  				"getpid",
   145  				"getppid",
   146  				"getpriority",
   147  				"getrandom",
   148  				"getresgid",
   149  				"getresgid32",
   150  				"getresuid",
   151  				"getresuid32",
   152  				"getrlimit",
   153  				"get_robust_list",
   154  				"getrusage",
   155  				"getsid",
   156  				"getsockname",
   157  				"getsockopt",
   158  				"get_thread_area",
   159  				"gettid",
   160  				"gettimeofday",
   161  				"getuid",
   162  				"getuid32",
   163  				"getxattr",
   164  				"inotify_add_watch",
   165  				"inotify_init",
   166  				"inotify_init1",
   167  				"inotify_rm_watch",
   168  				"io_cancel",
   169  				"ioctl",
   170  				"io_destroy",
   171  				"io_getevents",
   172  				"io_pgetevents",
   173  				"io_pgetevents_time64",
   174  				"ioprio_get",
   175  				"ioprio_set",
   176  				"io_setup",
   177  				"io_submit",
   178  				"io_uring_enter",
   179  				"io_uring_register",
   180  				"io_uring_setup",
   181  				"ipc",
   182  				"kill",
   183  				"lchown",
   184  				"lchown32",
   185  				"lgetxattr",
   186  				"link",
   187  				"linkat",
   188  				"listen",
   189  				"listxattr",
   190  				"llistxattr",
   191  				"_llseek",
   192  				"lremovexattr",
   193  				"lseek",
   194  				"lsetxattr",
   195  				"lstat",
   196  				"lstat64",
   197  				"madvise",
   198  				"membarrier",
   199  				"memfd_create",
   200  				"mincore",
   201  				"mkdir",
   202  				"mkdirat",
   203  				"mknod",
   204  				"mknodat",
   205  				"mlock",
   206  				"mlock2",
   207  				"mlockall",
   208  				"mmap",
   209  				"mmap2",
   210  				"mprotect",
   211  				"mq_getsetattr",
   212  				"mq_notify",
   213  				"mq_open",
   214  				"mq_timedreceive",
   215  				"mq_timedreceive_time64",
   216  				"mq_timedsend",
   217  				"mq_timedsend_time64",
   218  				"mq_unlink",
   219  				"mremap",
   220  				"msgctl",
   221  				"msgget",
   222  				"msgrcv",
   223  				"msgsnd",
   224  				"msync",
   225  				"munlock",
   226  				"munlockall",
   227  				"munmap",
   228  				"nanosleep",
   229  				"newfstatat",
   230  				"_newselect",
   231  				"open",
   232  				"openat",
   233  				"openat2",
   234  				"pause",
   235  				"pidfd_open",
   236  				"pidfd_send_signal",
   237  				"pipe",
   238  				"pipe2",
   239  				"poll",
   240  				"ppoll",
   241  				"ppoll_time64",
   242  				"prctl",
   243  				"pread64",
   244  				"preadv",
   245  				"preadv2",
   246  				"prlimit64",
   247  				"pselect6",
   248  				"pselect6_time64",
   249  				"pwrite64",
   250  				"pwritev",
   251  				"pwritev2",
   252  				"read",
   253  				"readahead",
   254  				"readlink",
   255  				"readlinkat",
   256  				"readv",
   257  				"recv",
   258  				"recvfrom",
   259  				"recvmmsg",
   260  				"recvmmsg_time64",
   261  				"recvmsg",
   262  				"remap_file_pages",
   263  				"removexattr",
   264  				"rename",
   265  				"renameat",
   266  				"renameat2",
   267  				"restart_syscall",
   268  				"rmdir",
   269  				"rseq",
   270  				"rt_sigaction",
   271  				"rt_sigpending",
   272  				"rt_sigprocmask",
   273  				"rt_sigqueueinfo",
   274  				"rt_sigreturn",
   275  				"rt_sigsuspend",
   276  				"rt_sigtimedwait",
   277  				"rt_sigtimedwait_time64",
   278  				"rt_tgsigqueueinfo",
   279  				"sched_getaffinity",
   280  				"sched_getattr",
   281  				"sched_getparam",
   282  				"sched_get_priority_max",
   283  				"sched_get_priority_min",
   284  				"sched_getscheduler",
   285  				"sched_rr_get_interval",
   286  				"sched_rr_get_interval_time64",
   287  				"sched_setaffinity",
   288  				"sched_setattr",
   289  				"sched_setparam",
   290  				"sched_setscheduler",
   291  				"sched_yield",
   292  				"seccomp",
   293  				"select",
   294  				"semctl",
   295  				"semget",
   296  				"semop",
   297  				"semtimedop",
   298  				"semtimedop_time64",
   299  				"send",
   300  				"sendfile",
   301  				"sendfile64",
   302  				"sendmmsg",
   303  				"sendmsg",
   304  				"sendto",
   305  				"setfsgid",
   306  				"setfsgid32",
   307  				"setfsuid",
   308  				"setfsuid32",
   309  				"setgid",
   310  				"setgid32",
   311  				"setgroups",
   312  				"setgroups32",
   313  				"setitimer",
   314  				"setpgid",
   315  				"setpriority",
   316  				"setregid",
   317  				"setregid32",
   318  				"setresgid",
   319  				"setresgid32",
   320  				"setresuid",
   321  				"setresuid32",
   322  				"setreuid",
   323  				"setreuid32",
   324  				"setrlimit",
   325  				"set_robust_list",
   326  				"setsid",
   327  				"setsockopt",
   328  				"set_thread_area",
   329  				"set_tid_address",
   330  				"setuid",
   331  				"setuid32",
   332  				"setxattr",
   333  				"shmat",
   334  				"shmctl",
   335  				"shmdt",
   336  				"shmget",
   337  				"shutdown",
   338  				"sigaltstack",
   339  				"signalfd",
   340  				"signalfd4",
   341  				"sigprocmask",
   342  				"sigreturn",
   343  				"socket",
   344  				"socketcall",
   345  				"socketpair",
   346  				"splice",
   347  				"stat",
   348  				"stat64",
   349  				"statfs",
   350  				"statfs64",
   351  				"statx",
   352  				"symlink",
   353  				"symlinkat",
   354  				"sync",
   355  				"sync_file_range",
   356  				"syncfs",
   357  				"sysinfo",
   358  				"tee",
   359  				"tgkill",
   360  				"time",
   361  				"timer_create",
   362  				"timer_delete",
   363  				"timer_getoverrun",
   364  				"timer_gettime",
   365  				"timer_gettime64",
   366  				"timer_settime",
   367  				"timer_settime64",
   368  				"timerfd_create",
   369  				"timerfd_gettime",
   370  				"timerfd_gettime64",
   371  				"timerfd_settime",
   372  				"timerfd_settime64",
   373  				"times",
   374  				"tkill",
   375  				"truncate",
   376  				"truncate64",
   377  				"ugetrlimit",
   378  				"umask",
   379  				"uname",
   380  				"unlink",
   381  				"unlinkat",
   382  				"utime",
   383  				"utimensat",
   384  				"utimensat_time64",
   385  				"utimes",
   386  				"vfork",
   387  				"vmsplice",
   388  				"wait4",
   389  				"waitid",
   390  				"waitpid",
   391  				"write",
   392  				"writev",
   393  			},
   394  			Action: specs.ActAllow,
   395  			Args:   []specs.LinuxSeccompArg{},
   396  		},
   397  		{
   398  			Names:  []string{"personality"},
   399  			Action: specs.ActAllow,
   400  			Args: []specs.LinuxSeccompArg{
   401  				{
   402  					Index: 0,
   403  					Value: 0x0,
   404  					Op:    specs.OpEqualTo,
   405  				},
   406  			},
   407  		},
   408  		{
   409  			Names:  []string{"personality"},
   410  			Action: specs.ActAllow,
   411  			Args: []specs.LinuxSeccompArg{
   412  				{
   413  					Index: 0,
   414  					Value: 0x0008,
   415  					Op:    specs.OpEqualTo,
   416  				},
   417  			},
   418  		},
   419  		{
   420  			Names:  []string{"personality"},
   421  			Action: specs.ActAllow,
   422  			Args: []specs.LinuxSeccompArg{
   423  				{
   424  					Index: 0,
   425  					Value: 0x20000,
   426  					Op:    specs.OpEqualTo,
   427  				},
   428  			},
   429  		},
   430  		{
   431  			Names:  []string{"personality"},
   432  			Action: specs.ActAllow,
   433  			Args: []specs.LinuxSeccompArg{
   434  				{
   435  					Index: 0,
   436  					Value: 0x20008,
   437  					Op:    specs.OpEqualTo,
   438  				},
   439  			},
   440  		},
   441  		{
   442  			Names:  []string{"personality"},
   443  			Action: specs.ActAllow,
   444  			Args: []specs.LinuxSeccompArg{
   445  				{
   446  					Index: 0,
   447  					Value: 0xffffffff,
   448  					Op:    specs.OpEqualTo,
   449  				},
   450  			},
   451  		},
   452  	}
   453  
   454  	s := &specs.LinuxSeccomp{
   455  		DefaultAction: specs.ActErrno,
   456  		Architectures: arches(),
   457  		Syscalls:      syscalls,
   458  	}
   459  
   460  	// include by arch
   461  	switch runtime.GOARCH {
   462  	case "ppc64le":
   463  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   464  			Names: []string{
   465  				"sync_file_range2",
   466  			},
   467  			Action: specs.ActAllow,
   468  			Args:   []specs.LinuxSeccompArg{},
   469  		})
   470  	case "arm", "arm64":
   471  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   472  			Names: []string{
   473  				"arm_fadvise64_64",
   474  				"arm_sync_file_range",
   475  				"sync_file_range2",
   476  				"breakpoint",
   477  				"cacheflush",
   478  				"set_tls",
   479  			},
   480  			Action: specs.ActAllow,
   481  			Args:   []specs.LinuxSeccompArg{},
   482  		})
   483  	case "amd64":
   484  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   485  			Names: []string{
   486  				"arch_prctl",
   487  				"modify_ldt",
   488  			},
   489  			Action: specs.ActAllow,
   490  			Args:   []specs.LinuxSeccompArg{},
   491  		})
   492  	case "386":
   493  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   494  			Names: []string{
   495  				"modify_ldt",
   496  			},
   497  			Action: specs.ActAllow,
   498  			Args:   []specs.LinuxSeccompArg{},
   499  		})
   500  	case "s390", "s390x":
   501  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   502  			Names: []string{
   503  				"s390_pci_mmio_read",
   504  				"s390_pci_mmio_write",
   505  				"s390_runtime_instr",
   506  			},
   507  			Action: specs.ActAllow,
   508  			Args:   []specs.LinuxSeccompArg{},
   509  		})
   510  	}
   511  
   512  	admin := false
   513  	for _, c := range sp.Process.Capabilities.Bounding {
   514  		switch c {
   515  		case "CAP_DAC_READ_SEARCH":
   516  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   517  				Names:  []string{"open_by_handle_at"},
   518  				Action: specs.ActAllow,
   519  				Args:   []specs.LinuxSeccompArg{},
   520  			})
   521  		case "CAP_SYS_ADMIN":
   522  			admin = true
   523  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   524  				Names: []string{
   525  					"bpf",
   526  					"clone",
   527  					"fanotify_init",
   528  					"lookup_dcookie",
   529  					"mount",
   530  					"name_to_handle_at",
   531  					"perf_event_open",
   532  					"quotactl",
   533  					"setdomainname",
   534  					"sethostname",
   535  					"setns",
   536  					"syslog",
   537  					"umount",
   538  					"umount2",
   539  					"unshare",
   540  				},
   541  				Action: specs.ActAllow,
   542  				Args:   []specs.LinuxSeccompArg{},
   543  			})
   544  		case "CAP_SYS_BOOT":
   545  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   546  				Names:  []string{"reboot"},
   547  				Action: specs.ActAllow,
   548  				Args:   []specs.LinuxSeccompArg{},
   549  			})
   550  		case "CAP_SYS_CHROOT":
   551  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   552  				Names:  []string{"chroot"},
   553  				Action: specs.ActAllow,
   554  				Args:   []specs.LinuxSeccompArg{},
   555  			})
   556  		case "CAP_SYS_MODULE":
   557  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   558  				Names: []string{
   559  					"delete_module",
   560  					"init_module",
   561  					"finit_module",
   562  				},
   563  				Action: specs.ActAllow,
   564  				Args:   []specs.LinuxSeccompArg{},
   565  			})
   566  		case "CAP_SYS_PACCT":
   567  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   568  				Names:  []string{"acct"},
   569  				Action: specs.ActAllow,
   570  				Args:   []specs.LinuxSeccompArg{},
   571  			})
   572  		case "CAP_SYS_PTRACE":
   573  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   574  				Names: []string{
   575  					"kcmp",
   576  					"pidfd_getfd",
   577  					"process_vm_readv",
   578  					"process_vm_writev",
   579  					"ptrace",
   580  				},
   581  				Action: specs.ActAllow,
   582  				Args:   []specs.LinuxSeccompArg{},
   583  			})
   584  		case "CAP_SYS_RAWIO":
   585  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   586  				Names: []string{
   587  					"iopl",
   588  					"ioperm",
   589  				},
   590  				Action: specs.ActAllow,
   591  				Args:   []specs.LinuxSeccompArg{},
   592  			})
   593  		case "CAP_SYS_TIME":
   594  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   595  				Names: []string{
   596  					"settimeofday",
   597  					"stime",
   598  					"clock_settime",
   599  				},
   600  				Action: specs.ActAllow,
   601  				Args:   []specs.LinuxSeccompArg{},
   602  			})
   603  		case "CAP_SYS_TTY_CONFIG":
   604  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   605  				Names:  []string{"vhangup"},
   606  				Action: specs.ActAllow,
   607  				Args:   []specs.LinuxSeccompArg{},
   608  			})
   609  		case "CAP_SYSLOG":
   610  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   611  				Names:  []string{"syslog"},
   612  				Action: specs.ActAllow,
   613  				Args:   []specs.LinuxSeccompArg{},
   614  			})
   615  		}
   616  	}
   617  
   618  	if !admin {
   619  		switch runtime.GOARCH {
   620  		case "s390", "s390x":
   621  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   622  				Names: []string{
   623  					"clone",
   624  				},
   625  				Action: specs.ActAllow,
   626  				Args: []specs.LinuxSeccompArg{
   627  					{
   628  						Index:    1,
   629  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   630  						ValueTwo: 0,
   631  						Op:       specs.OpMaskedEqual,
   632  					},
   633  				},
   634  			})
   635  		default:
   636  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   637  				Names: []string{
   638  					"clone",
   639  				},
   640  				Action: specs.ActAllow,
   641  				Args: []specs.LinuxSeccompArg{
   642  					{
   643  						Index:    0,
   644  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   645  						ValueTwo: 0,
   646  						Op:       specs.OpMaskedEqual,
   647  					},
   648  				},
   649  			})
   650  		}
   651  	}
   652  
   653  	return s
   654  }