github.com/demonoid81/containerd@v1.3.4/contrib/seccomp/seccomp_default.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package seccomp
    20  
    21  import (
    22  	"runtime"
    23  
    24  	"golang.org/x/sys/unix"
    25  
    26  	"github.com/opencontainers/runtime-spec/specs-go"
    27  )
    28  
    29  func arches() []specs.Arch {
    30  	switch runtime.GOARCH {
    31  	case "amd64":
    32  		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
    33  	case "arm64":
    34  		return []specs.Arch{specs.ArchARM, specs.ArchAARCH64}
    35  	case "mips64":
    36  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    37  	case "mips64n32":
    38  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    39  	case "mipsel64":
    40  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    41  	case "mipsel64n32":
    42  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    43  	case "s390x":
    44  		return []specs.Arch{specs.ArchS390, specs.ArchS390X}
    45  	default:
    46  		return []specs.Arch{}
    47  	}
    48  }
    49  
    50  // DefaultProfile defines the whitelist for the default seccomp profile.
    51  func DefaultProfile(sp *specs.Spec) *specs.LinuxSeccomp {
    52  	syscalls := []specs.LinuxSyscall{
    53  		{
    54  			Names: []string{
    55  				"accept",
    56  				"accept4",
    57  				"access",
    58  				"alarm",
    59  				"alarm",
    60  				"bind",
    61  				"brk",
    62  				"capget",
    63  				"capset",
    64  				"chdir",
    65  				"chmod",
    66  				"chown",
    67  				"chown32",
    68  				"clock_getres",
    69  				"clock_gettime",
    70  				"clock_nanosleep",
    71  				"close",
    72  				"connect",
    73  				"copy_file_range",
    74  				"creat",
    75  				"dup",
    76  				"dup2",
    77  				"dup3",
    78  				"epoll_create",
    79  				"epoll_create1",
    80  				"epoll_ctl",
    81  				"epoll_ctl_old",
    82  				"epoll_pwait",
    83  				"epoll_wait",
    84  				"epoll_wait_old",
    85  				"eventfd",
    86  				"eventfd2",
    87  				"execve",
    88  				"execveat",
    89  				"exit",
    90  				"exit_group",
    91  				"faccessat",
    92  				"fadvise64",
    93  				"fadvise64_64",
    94  				"fallocate",
    95  				"fanotify_mark",
    96  				"fchdir",
    97  				"fchmod",
    98  				"fchmodat",
    99  				"fchown",
   100  				"fchown32",
   101  				"fchownat",
   102  				"fcntl",
   103  				"fcntl64",
   104  				"fdatasync",
   105  				"fgetxattr",
   106  				"flistxattr",
   107  				"flock",
   108  				"fork",
   109  				"fremovexattr",
   110  				"fsetxattr",
   111  				"fstat",
   112  				"fstat64",
   113  				"fstatat64",
   114  				"fstatfs",
   115  				"fstatfs64",
   116  				"fsync",
   117  				"ftruncate",
   118  				"ftruncate64",
   119  				"futex",
   120  				"futimesat",
   121  				"getcpu",
   122  				"getcwd",
   123  				"getdents",
   124  				"getdents64",
   125  				"getegid",
   126  				"getegid32",
   127  				"geteuid",
   128  				"geteuid32",
   129  				"getgid",
   130  				"getgid32",
   131  				"getgroups",
   132  				"getgroups32",
   133  				"getitimer",
   134  				"getpeername",
   135  				"getpgid",
   136  				"getpgrp",
   137  				"getpid",
   138  				"getppid",
   139  				"getpriority",
   140  				"getrandom",
   141  				"getresgid",
   142  				"getresgid32",
   143  				"getresuid",
   144  				"getresuid32",
   145  				"getrlimit",
   146  				"get_robust_list",
   147  				"getrusage",
   148  				"getsid",
   149  				"getsockname",
   150  				"getsockopt",
   151  				"get_thread_area",
   152  				"gettid",
   153  				"gettimeofday",
   154  				"getuid",
   155  				"getuid32",
   156  				"getxattr",
   157  				"inotify_add_watch",
   158  				"inotify_init",
   159  				"inotify_init1",
   160  				"inotify_rm_watch",
   161  				"io_cancel",
   162  				"ioctl",
   163  				"io_destroy",
   164  				"io_getevents",
   165  				"io_pgetevents",
   166  				"ioprio_get",
   167  				"ioprio_set",
   168  				"io_setup",
   169  				"io_submit",
   170  				"ipc",
   171  				"kill",
   172  				"lchown",
   173  				"lchown32",
   174  				"lgetxattr",
   175  				"link",
   176  				"linkat",
   177  				"listen",
   178  				"listxattr",
   179  				"llistxattr",
   180  				"_llseek",
   181  				"lremovexattr",
   182  				"lseek",
   183  				"lsetxattr",
   184  				"lstat",
   185  				"lstat64",
   186  				"madvise",
   187  				"memfd_create",
   188  				"mincore",
   189  				"mkdir",
   190  				"mkdirat",
   191  				"mknod",
   192  				"mknodat",
   193  				"mlock",
   194  				"mlock2",
   195  				"mlockall",
   196  				"mmap",
   197  				"mmap2",
   198  				"mprotect",
   199  				"mq_getsetattr",
   200  				"mq_notify",
   201  				"mq_open",
   202  				"mq_timedreceive",
   203  				"mq_timedsend",
   204  				"mq_unlink",
   205  				"mremap",
   206  				"msgctl",
   207  				"msgget",
   208  				"msgrcv",
   209  				"msgsnd",
   210  				"msync",
   211  				"munlock",
   212  				"munlockall",
   213  				"munmap",
   214  				"nanosleep",
   215  				"newfstatat",
   216  				"_newselect",
   217  				"open",
   218  				"openat",
   219  				"pause",
   220  				"pipe",
   221  				"pipe2",
   222  				"poll",
   223  				"ppoll",
   224  				"prctl",
   225  				"pread64",
   226  				"preadv",
   227  				"prlimit64",
   228  				"pselect6",
   229  				"pwrite64",
   230  				"pwritev",
   231  				"read",
   232  				"readahead",
   233  				"readlink",
   234  				"readlinkat",
   235  				"readv",
   236  				"recv",
   237  				"recvfrom",
   238  				"recvmmsg",
   239  				"recvmsg",
   240  				"remap_file_pages",
   241  				"removexattr",
   242  				"rename",
   243  				"renameat",
   244  				"renameat2",
   245  				"restart_syscall",
   246  				"rmdir",
   247  				"rt_sigaction",
   248  				"rt_sigpending",
   249  				"rt_sigprocmask",
   250  				"rt_sigqueueinfo",
   251  				"rt_sigreturn",
   252  				"rt_sigsuspend",
   253  				"rt_sigtimedwait",
   254  				"rt_tgsigqueueinfo",
   255  				"sched_getaffinity",
   256  				"sched_getattr",
   257  				"sched_getparam",
   258  				"sched_get_priority_max",
   259  				"sched_get_priority_min",
   260  				"sched_getscheduler",
   261  				"sched_rr_get_interval",
   262  				"sched_setaffinity",
   263  				"sched_setattr",
   264  				"sched_setparam",
   265  				"sched_setscheduler",
   266  				"sched_yield",
   267  				"seccomp",
   268  				"select",
   269  				"semctl",
   270  				"semget",
   271  				"semop",
   272  				"semtimedop",
   273  				"send",
   274  				"sendfile",
   275  				"sendfile64",
   276  				"sendmmsg",
   277  				"sendmsg",
   278  				"sendto",
   279  				"setfsgid",
   280  				"setfsgid32",
   281  				"setfsuid",
   282  				"setfsuid32",
   283  				"setgid",
   284  				"setgid32",
   285  				"setgroups",
   286  				"setgroups32",
   287  				"setitimer",
   288  				"setpgid",
   289  				"setpriority",
   290  				"setregid",
   291  				"setregid32",
   292  				"setresgid",
   293  				"setresgid32",
   294  				"setresuid",
   295  				"setresuid32",
   296  				"setreuid",
   297  				"setreuid32",
   298  				"setrlimit",
   299  				"set_robust_list",
   300  				"setsid",
   301  				"setsockopt",
   302  				"set_thread_area",
   303  				"set_tid_address",
   304  				"setuid",
   305  				"setuid32",
   306  				"setxattr",
   307  				"shmat",
   308  				"shmctl",
   309  				"shmdt",
   310  				"shmget",
   311  				"shutdown",
   312  				"sigaltstack",
   313  				"signalfd",
   314  				"signalfd4",
   315  				"sigprocmask",
   316  				"sigreturn",
   317  				"socket",
   318  				"socketcall",
   319  				"socketpair",
   320  				"splice",
   321  				"stat",
   322  				"stat64",
   323  				"statfs",
   324  				"statfs64",
   325  				"statx",
   326  				"symlink",
   327  				"symlinkat",
   328  				"sync",
   329  				"sync_file_range",
   330  				"syncfs",
   331  				"sysinfo",
   332  				"syslog",
   333  				"tee",
   334  				"tgkill",
   335  				"time",
   336  				"timer_create",
   337  				"timer_delete",
   338  				"timerfd_create",
   339  				"timerfd_gettime",
   340  				"timerfd_settime",
   341  				"timer_getoverrun",
   342  				"timer_gettime",
   343  				"timer_settime",
   344  				"times",
   345  				"tkill",
   346  				"truncate",
   347  				"truncate64",
   348  				"ugetrlimit",
   349  				"umask",
   350  				"uname",
   351  				"unlink",
   352  				"unlinkat",
   353  				"utime",
   354  				"utimensat",
   355  				"utimes",
   356  				"vfork",
   357  				"vmsplice",
   358  				"wait4",
   359  				"waitid",
   360  				"waitpid",
   361  				"write",
   362  				"writev",
   363  			},
   364  			Action: specs.ActAllow,
   365  			Args:   []specs.LinuxSeccompArg{},
   366  		},
   367  		{
   368  			Names:  []string{"personality"},
   369  			Action: specs.ActAllow,
   370  			Args: []specs.LinuxSeccompArg{
   371  				{
   372  					Index: 0,
   373  					Value: 0x0,
   374  					Op:    specs.OpEqualTo,
   375  				},
   376  			},
   377  		},
   378  		{
   379  			Names:  []string{"personality"},
   380  			Action: specs.ActAllow,
   381  			Args: []specs.LinuxSeccompArg{
   382  				{
   383  					Index: 0,
   384  					Value: 0x0008,
   385  					Op:    specs.OpEqualTo,
   386  				},
   387  			},
   388  		},
   389  		{
   390  			Names:  []string{"personality"},
   391  			Action: specs.ActAllow,
   392  			Args: []specs.LinuxSeccompArg{
   393  				{
   394  					Index: 0,
   395  					Value: 0xffffffff,
   396  					Op:    specs.OpEqualTo,
   397  				},
   398  			},
   399  		},
   400  	}
   401  
   402  	s := &specs.LinuxSeccomp{
   403  		DefaultAction: specs.ActErrno,
   404  		Architectures: arches(),
   405  		Syscalls:      syscalls,
   406  	}
   407  
   408  	// include by arch
   409  	switch runtime.GOARCH {
   410  	case "arm", "arm64":
   411  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   412  			Names: []string{
   413  				"arm_fadvise64_64",
   414  				"arm_sync_file_range",
   415  				"breakpoint",
   416  				"cacheflush",
   417  				"set_tls",
   418  			},
   419  			Action: specs.ActAllow,
   420  			Args:   []specs.LinuxSeccompArg{},
   421  		})
   422  	case "amd64":
   423  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   424  			Names: []string{
   425  				"arch_prctl",
   426  				"modify_ldt",
   427  			},
   428  			Action: specs.ActAllow,
   429  			Args:   []specs.LinuxSeccompArg{},
   430  		})
   431  	case "386":
   432  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   433  			Names: []string{
   434  				"modify_ldt",
   435  			},
   436  			Action: specs.ActAllow,
   437  			Args:   []specs.LinuxSeccompArg{},
   438  		})
   439  	case "s390", "s390x":
   440  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   441  			Names: []string{
   442  				"s390_pci_mmio_read",
   443  				"s390_pci_mmio_write",
   444  				"s390_runtime_instr",
   445  			},
   446  			Action: specs.ActAllow,
   447  			Args:   []specs.LinuxSeccompArg{},
   448  		})
   449  	}
   450  
   451  	admin := false
   452  	for _, c := range sp.Process.Capabilities.Bounding {
   453  		switch c {
   454  		case "CAP_DAC_READ_SEARCH":
   455  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   456  				Names:  []string{"open_by_handle_at"},
   457  				Action: specs.ActAllow,
   458  				Args:   []specs.LinuxSeccompArg{},
   459  			})
   460  		case "CAP_SYS_ADMIN":
   461  			admin = true
   462  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   463  				Names: []string{
   464  					"bpf",
   465  					"clone",
   466  					"fanotify_init",
   467  					"lookup_dcookie",
   468  					"mount",
   469  					"name_to_handle_at",
   470  					"perf_event_open",
   471  					"setdomainname",
   472  					"sethostname",
   473  					"setns",
   474  					"umount",
   475  					"umount2",
   476  					"unshare",
   477  				},
   478  				Action: specs.ActAllow,
   479  				Args:   []specs.LinuxSeccompArg{},
   480  			})
   481  		case "CAP_SYS_BOOT":
   482  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   483  				Names:  []string{"reboot"},
   484  				Action: specs.ActAllow,
   485  				Args:   []specs.LinuxSeccompArg{},
   486  			})
   487  		case "CAP_SYS_CHROOT":
   488  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   489  				Names:  []string{"chroot"},
   490  				Action: specs.ActAllow,
   491  				Args:   []specs.LinuxSeccompArg{},
   492  			})
   493  		case "CAP_SYS_MODULE":
   494  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   495  				Names: []string{
   496  					"delete_module",
   497  					"init_module",
   498  					"finit_module",
   499  					"query_module",
   500  				},
   501  				Action: specs.ActAllow,
   502  				Args:   []specs.LinuxSeccompArg{},
   503  			})
   504  		case "CAP_SYS_PACCT":
   505  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   506  				Names:  []string{"acct"},
   507  				Action: specs.ActAllow,
   508  				Args:   []specs.LinuxSeccompArg{},
   509  			})
   510  		case "CAP_SYS_PTRACE":
   511  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   512  				Names: []string{
   513  					"kcmp",
   514  					"process_vm_readv",
   515  					"process_vm_writev",
   516  					"ptrace",
   517  				},
   518  				Action: specs.ActAllow,
   519  				Args:   []specs.LinuxSeccompArg{},
   520  			})
   521  		case "CAP_SYS_RAWIO":
   522  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   523  				Names: []string{
   524  					"iopl",
   525  					"ioperm",
   526  				},
   527  				Action: specs.ActAllow,
   528  				Args:   []specs.LinuxSeccompArg{},
   529  			})
   530  		case "CAP_SYS_TIME":
   531  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   532  				Names: []string{
   533  					"settimeofday",
   534  					"stime",
   535  					"adjtimex",
   536  				},
   537  				Action: specs.ActAllow,
   538  				Args:   []specs.LinuxSeccompArg{},
   539  			})
   540  		case "CAP_SYS_TTY_CONFIG":
   541  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   542  				Names:  []string{"vhangup"},
   543  				Action: specs.ActAllow,
   544  				Args:   []specs.LinuxSeccompArg{},
   545  			})
   546  		}
   547  	}
   548  
   549  	if !admin {
   550  		switch runtime.GOARCH {
   551  		case "s390", "s390x":
   552  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   553  				Names: []string{
   554  					"clone",
   555  				},
   556  				Action: specs.ActAllow,
   557  				Args: []specs.LinuxSeccompArg{
   558  					{
   559  						Index:    1,
   560  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   561  						ValueTwo: 0,
   562  						Op:       specs.OpMaskedEqual,
   563  					},
   564  				},
   565  			})
   566  		default:
   567  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   568  				Names: []string{
   569  					"clone",
   570  				},
   571  				Action: specs.ActAllow,
   572  				Args: []specs.LinuxSeccompArg{
   573  					{
   574  						Index:    0,
   575  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   576  						ValueTwo: 0,
   577  						Op:       specs.OpMaskedEqual,
   578  					},
   579  				},
   580  			})
   581  		}
   582  	}
   583  
   584  	return s
   585  }