github.com/containerd/containerd@v22.0.0-20200918172823-438c87b8e050+incompatible/contrib/seccomp/seccomp_default.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package seccomp
    20  
    21  import (
    22  	"runtime"
    23  
    24  	"golang.org/x/sys/unix"
    25  
    26  	"github.com/opencontainers/runtime-spec/specs-go"
    27  )
    28  
    29  func arches() []specs.Arch {
    30  	switch runtime.GOARCH {
    31  	case "amd64":
    32  		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
    33  	case "arm64":
    34  		return []specs.Arch{specs.ArchARM, specs.ArchAARCH64}
    35  	case "mips64":
    36  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    37  	case "mips64n32":
    38  		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
    39  	case "mipsel64":
    40  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    41  	case "mipsel64n32":
    42  		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
    43  	case "s390x":
    44  		return []specs.Arch{specs.ArchS390, specs.ArchS390X}
    45  	default:
    46  		return []specs.Arch{}
    47  	}
    48  }
    49  
    50  // DefaultProfile defines the allowed syscalls for the default seccomp profile.
    51  func DefaultProfile(sp *specs.Spec) *specs.LinuxSeccomp {
    52  	syscalls := []specs.LinuxSyscall{
    53  		{
    54  			Names: []string{
    55  				"accept",
    56  				"accept4",
    57  				"access",
    58  				"adjtimex",
    59  				"alarm",
    60  				"bind",
    61  				"brk",
    62  				"capget",
    63  				"capset",
    64  				"chdir",
    65  				"chmod",
    66  				"chown",
    67  				"chown32",
    68  				"clock_adjtime",
    69  				"clock_adjtime64",
    70  				"clock_getres",
    71  				"clock_getres_time64",
    72  				"clock_gettime",
    73  				"clock_gettime64",
    74  				"clock_nanosleep",
    75  				"clock_nanosleep_time64",
    76  				"close",
    77  				"connect",
    78  				"copy_file_range",
    79  				"creat",
    80  				"dup",
    81  				"dup2",
    82  				"dup3",
    83  				"epoll_create",
    84  				"epoll_create1",
    85  				"epoll_ctl",
    86  				"epoll_ctl_old",
    87  				"epoll_pwait",
    88  				"epoll_wait",
    89  				"epoll_wait_old",
    90  				"eventfd",
    91  				"eventfd2",
    92  				"execve",
    93  				"execveat",
    94  				"exit",
    95  				"exit_group",
    96  				"faccessat",
    97  				"faccessat2",
    98  				"fadvise64",
    99  				"fadvise64_64",
   100  				"fallocate",
   101  				"fanotify_mark",
   102  				"fchdir",
   103  				"fchmod",
   104  				"fchmodat",
   105  				"fchown",
   106  				"fchown32",
   107  				"fchownat",
   108  				"fcntl",
   109  				"fcntl64",
   110  				"fdatasync",
   111  				"fgetxattr",
   112  				"flistxattr",
   113  				"flock",
   114  				"fork",
   115  				"fremovexattr",
   116  				"fsetxattr",
   117  				"fstat",
   118  				"fstat64",
   119  				"fstatat64",
   120  				"fstatfs",
   121  				"fstatfs64",
   122  				"fsync",
   123  				"ftruncate",
   124  				"ftruncate64",
   125  				"futex",
   126  				"futex_time64",
   127  				"futimesat",
   128  				"getcpu",
   129  				"getcwd",
   130  				"getdents",
   131  				"getdents64",
   132  				"getegid",
   133  				"getegid32",
   134  				"geteuid",
   135  				"geteuid32",
   136  				"getgid",
   137  				"getgid32",
   138  				"getgroups",
   139  				"getgroups32",
   140  				"getitimer",
   141  				"getpeername",
   142  				"getpgid",
   143  				"getpgrp",
   144  				"getpid",
   145  				"getppid",
   146  				"getpriority",
   147  				"getrandom",
   148  				"getresgid",
   149  				"getresgid32",
   150  				"getresuid",
   151  				"getresuid32",
   152  				"getrlimit",
   153  				"get_robust_list",
   154  				"getrusage",
   155  				"getsid",
   156  				"getsockname",
   157  				"getsockopt",
   158  				"get_thread_area",
   159  				"gettid",
   160  				"gettimeofday",
   161  				"getuid",
   162  				"getuid32",
   163  				"getxattr",
   164  				"inotify_add_watch",
   165  				"inotify_init",
   166  				"inotify_init1",
   167  				"inotify_rm_watch",
   168  				"io_cancel",
   169  				"ioctl",
   170  				"io_destroy",
   171  				"io_getevents",
   172  				"io_pgetevents",
   173  				"io_pgetevents_time64",
   174  				"ioprio_get",
   175  				"ioprio_set",
   176  				"io_setup",
   177  				"io_submit",
   178  				"io_uring_enter",
   179  				"io_uring_register",
   180  				"io_uring_setup",
   181  				"ipc",
   182  				"kill",
   183  				"lchown",
   184  				"lchown32",
   185  				"lgetxattr",
   186  				"link",
   187  				"linkat",
   188  				"listen",
   189  				"listxattr",
   190  				"llistxattr",
   191  				"_llseek",
   192  				"lremovexattr",
   193  				"lseek",
   194  				"lsetxattr",
   195  				"lstat",
   196  				"lstat64",
   197  				"madvise",
   198  				"membarrier",
   199  				"memfd_create",
   200  				"mincore",
   201  				"mkdir",
   202  				"mkdirat",
   203  				"mknod",
   204  				"mknodat",
   205  				"mlock",
   206  				"mlock2",
   207  				"mlockall",
   208  				"mmap",
   209  				"mmap2",
   210  				"mprotect",
   211  				"mq_getsetattr",
   212  				"mq_notify",
   213  				"mq_open",
   214  				"mq_timedreceive",
   215  				"mq_timedreceive_time64",
   216  				"mq_timedsend",
   217  				"mq_timedsend_time64",
   218  				"mq_unlink",
   219  				"mremap",
   220  				"msgctl",
   221  				"msgget",
   222  				"msgrcv",
   223  				"msgsnd",
   224  				"msync",
   225  				"munlock",
   226  				"munlockall",
   227  				"munmap",
   228  				"nanosleep",
   229  				"newfstatat",
   230  				"_newselect",
   231  				"open",
   232  				"openat",
   233  				"openat2",
   234  				"pause",
   235  				"pipe",
   236  				"pipe2",
   237  				"poll",
   238  				"ppoll",
   239  				"ppoll_time64",
   240  				"prctl",
   241  				"pread64",
   242  				"preadv",
   243  				"preadv2",
   244  				"prlimit64",
   245  				"pselect6",
   246  				"pselect6_time64",
   247  				"pwrite64",
   248  				"pwritev",
   249  				"pwritev2",
   250  				"read",
   251  				"readahead",
   252  				"readlink",
   253  				"readlinkat",
   254  				"readv",
   255  				"recv",
   256  				"recvfrom",
   257  				"recvmmsg",
   258  				"recvmmsg_time64",
   259  				"recvmsg",
   260  				"remap_file_pages",
   261  				"removexattr",
   262  				"rename",
   263  				"renameat",
   264  				"renameat2",
   265  				"restart_syscall",
   266  				"rmdir",
   267  				"rseq",
   268  				"rt_sigaction",
   269  				"rt_sigpending",
   270  				"rt_sigprocmask",
   271  				"rt_sigqueueinfo",
   272  				"rt_sigreturn",
   273  				"rt_sigsuspend",
   274  				"rt_sigtimedwait",
   275  				"rt_sigtimedwait_time64",
   276  				"rt_tgsigqueueinfo",
   277  				"sched_getaffinity",
   278  				"sched_getattr",
   279  				"sched_getparam",
   280  				"sched_get_priority_max",
   281  				"sched_get_priority_min",
   282  				"sched_getscheduler",
   283  				"sched_rr_get_interval",
   284  				"sched_rr_get_interval_time64",
   285  				"sched_setaffinity",
   286  				"sched_setattr",
   287  				"sched_setparam",
   288  				"sched_setscheduler",
   289  				"sched_yield",
   290  				"seccomp",
   291  				"select",
   292  				"semctl",
   293  				"semget",
   294  				"semop",
   295  				"semtimedop",
   296  				"semtimedop_time64",
   297  				"send",
   298  				"sendfile",
   299  				"sendfile64",
   300  				"sendmmsg",
   301  				"sendmsg",
   302  				"sendto",
   303  				"setfsgid",
   304  				"setfsgid32",
   305  				"setfsuid",
   306  				"setfsuid32",
   307  				"setgid",
   308  				"setgid32",
   309  				"setgroups",
   310  				"setgroups32",
   311  				"setitimer",
   312  				"setpgid",
   313  				"setpriority",
   314  				"setregid",
   315  				"setregid32",
   316  				"setresgid",
   317  				"setresgid32",
   318  				"setresuid",
   319  				"setresuid32",
   320  				"setreuid",
   321  				"setreuid32",
   322  				"setrlimit",
   323  				"set_robust_list",
   324  				"setsid",
   325  				"setsockopt",
   326  				"set_thread_area",
   327  				"set_tid_address",
   328  				"setuid",
   329  				"setuid32",
   330  				"setxattr",
   331  				"shmat",
   332  				"shmctl",
   333  				"shmdt",
   334  				"shmget",
   335  				"shutdown",
   336  				"sigaltstack",
   337  				"signalfd",
   338  				"signalfd4",
   339  				"sigprocmask",
   340  				"sigreturn",
   341  				"socket",
   342  				"socketcall",
   343  				"socketpair",
   344  				"splice",
   345  				"stat",
   346  				"stat64",
   347  				"statfs",
   348  				"statfs64",
   349  				"statx",
   350  				"symlink",
   351  				"symlinkat",
   352  				"sync",
   353  				"sync_file_range",
   354  				"syncfs",
   355  				"sysinfo",
   356  				"tee",
   357  				"tgkill",
   358  				"time",
   359  				"timer_create",
   360  				"timer_delete",
   361  				"timer_getoverrun",
   362  				"timer_gettime",
   363  				"timer_gettime64",
   364  				"timer_settime",
   365  				"timer_settime64",
   366  				"timerfd_create",
   367  				"timerfd_gettime",
   368  				"timerfd_gettime64",
   369  				"timerfd_settime",
   370  				"timerfd_settime64",
   371  				"times",
   372  				"tkill",
   373  				"truncate",
   374  				"truncate64",
   375  				"ugetrlimit",
   376  				"umask",
   377  				"uname",
   378  				"unlink",
   379  				"unlinkat",
   380  				"utime",
   381  				"utimensat",
   382  				"utimensat_time64",
   383  				"utimes",
   384  				"vfork",
   385  				"vmsplice",
   386  				"wait4",
   387  				"waitid",
   388  				"waitpid",
   389  				"write",
   390  				"writev",
   391  			},
   392  			Action: specs.ActAllow,
   393  			Args:   []specs.LinuxSeccompArg{},
   394  		},
   395  		{
   396  			Names:  []string{"personality"},
   397  			Action: specs.ActAllow,
   398  			Args: []specs.LinuxSeccompArg{
   399  				{
   400  					Index: 0,
   401  					Value: 0x0,
   402  					Op:    specs.OpEqualTo,
   403  				},
   404  			},
   405  		},
   406  		{
   407  			Names:  []string{"personality"},
   408  			Action: specs.ActAllow,
   409  			Args: []specs.LinuxSeccompArg{
   410  				{
   411  					Index: 0,
   412  					Value: 0x0008,
   413  					Op:    specs.OpEqualTo,
   414  				},
   415  			},
   416  		},
   417  		{
   418  			Names:  []string{"personality"},
   419  			Action: specs.ActAllow,
   420  			Args: []specs.LinuxSeccompArg{
   421  				{
   422  					Index: 0,
   423  					Value: 0x20000,
   424  					Op:    specs.OpEqualTo,
   425  				},
   426  			},
   427  		},
   428  		{
   429  			Names:  []string{"personality"},
   430  			Action: specs.ActAllow,
   431  			Args: []specs.LinuxSeccompArg{
   432  				{
   433  					Index: 0,
   434  					Value: 0x20008,
   435  					Op:    specs.OpEqualTo,
   436  				},
   437  			},
   438  		},
   439  		{
   440  			Names:  []string{"personality"},
   441  			Action: specs.ActAllow,
   442  			Args: []specs.LinuxSeccompArg{
   443  				{
   444  					Index: 0,
   445  					Value: 0xffffffff,
   446  					Op:    specs.OpEqualTo,
   447  				},
   448  			},
   449  		},
   450  	}
   451  
   452  	s := &specs.LinuxSeccomp{
   453  		DefaultAction: specs.ActErrno,
   454  		Architectures: arches(),
   455  		Syscalls:      syscalls,
   456  	}
   457  
   458  	// include by arch
   459  	switch runtime.GOARCH {
   460  	case "ppc64le":
   461  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   462  			Names: []string{
   463  				"sync_file_range2",
   464  			},
   465  			Action: specs.ActAllow,
   466  			Args:   []specs.LinuxSeccompArg{},
   467  		})
   468  	case "arm", "arm64":
   469  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   470  			Names: []string{
   471  				"arm_fadvise64_64",
   472  				"arm_sync_file_range",
   473  				"sync_file_range2",
   474  				"breakpoint",
   475  				"cacheflush",
   476  				"set_tls",
   477  			},
   478  			Action: specs.ActAllow,
   479  			Args:   []specs.LinuxSeccompArg{},
   480  		})
   481  	case "amd64":
   482  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   483  			Names: []string{
   484  				"arch_prctl",
   485  				"modify_ldt",
   486  			},
   487  			Action: specs.ActAllow,
   488  			Args:   []specs.LinuxSeccompArg{},
   489  		})
   490  	case "386":
   491  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   492  			Names: []string{
   493  				"modify_ldt",
   494  			},
   495  			Action: specs.ActAllow,
   496  			Args:   []specs.LinuxSeccompArg{},
   497  		})
   498  	case "s390", "s390x":
   499  		s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   500  			Names: []string{
   501  				"s390_pci_mmio_read",
   502  				"s390_pci_mmio_write",
   503  				"s390_runtime_instr",
   504  			},
   505  			Action: specs.ActAllow,
   506  			Args:   []specs.LinuxSeccompArg{},
   507  		})
   508  	}
   509  
   510  	admin := false
   511  	for _, c := range sp.Process.Capabilities.Bounding {
   512  		switch c {
   513  		case "CAP_DAC_READ_SEARCH":
   514  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   515  				Names:  []string{"open_by_handle_at"},
   516  				Action: specs.ActAllow,
   517  				Args:   []specs.LinuxSeccompArg{},
   518  			})
   519  		case "CAP_SYS_ADMIN":
   520  			admin = true
   521  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   522  				Names: []string{
   523  					"bpf",
   524  					"clone",
   525  					"fanotify_init",
   526  					"lookup_dcookie",
   527  					"mount",
   528  					"name_to_handle_at",
   529  					"perf_event_open",
   530  					"quotactl",
   531  					"setdomainname",
   532  					"sethostname",
   533  					"setns",
   534  					"syslog",
   535  					"umount",
   536  					"umount2",
   537  					"unshare",
   538  				},
   539  				Action: specs.ActAllow,
   540  				Args:   []specs.LinuxSeccompArg{},
   541  			})
   542  		case "CAP_SYS_BOOT":
   543  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   544  				Names:  []string{"reboot"},
   545  				Action: specs.ActAllow,
   546  				Args:   []specs.LinuxSeccompArg{},
   547  			})
   548  		case "CAP_SYS_CHROOT":
   549  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   550  				Names:  []string{"chroot"},
   551  				Action: specs.ActAllow,
   552  				Args:   []specs.LinuxSeccompArg{},
   553  			})
   554  		case "CAP_SYS_MODULE":
   555  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   556  				Names: []string{
   557  					"delete_module",
   558  					"init_module",
   559  					"finit_module",
   560  				},
   561  				Action: specs.ActAllow,
   562  				Args:   []specs.LinuxSeccompArg{},
   563  			})
   564  		case "CAP_SYS_PACCT":
   565  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   566  				Names:  []string{"acct"},
   567  				Action: specs.ActAllow,
   568  				Args:   []specs.LinuxSeccompArg{},
   569  			})
   570  		case "CAP_SYS_PTRACE":
   571  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   572  				Names: []string{
   573  					"kcmp",
   574  					"process_vm_readv",
   575  					"process_vm_writev",
   576  					"ptrace",
   577  				},
   578  				Action: specs.ActAllow,
   579  				Args:   []specs.LinuxSeccompArg{},
   580  			})
   581  		case "CAP_SYS_RAWIO":
   582  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   583  				Names: []string{
   584  					"iopl",
   585  					"ioperm",
   586  				},
   587  				Action: specs.ActAllow,
   588  				Args:   []specs.LinuxSeccompArg{},
   589  			})
   590  		case "CAP_SYS_TIME":
   591  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   592  				Names: []string{
   593  					"settimeofday",
   594  					"stime",
   595  					"clock_settime",
   596  				},
   597  				Action: specs.ActAllow,
   598  				Args:   []specs.LinuxSeccompArg{},
   599  			})
   600  		case "CAP_SYS_TTY_CONFIG":
   601  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   602  				Names:  []string{"vhangup"},
   603  				Action: specs.ActAllow,
   604  				Args:   []specs.LinuxSeccompArg{},
   605  			})
   606  		case "CAP_SYSLOG":
   607  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   608  				Names:  []string{"syslog"},
   609  				Action: specs.ActAllow,
   610  				Args:   []specs.LinuxSeccompArg{},
   611  			})
   612  		}
   613  	}
   614  
   615  	if !admin {
   616  		switch runtime.GOARCH {
   617  		case "s390", "s390x":
   618  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   619  				Names: []string{
   620  					"clone",
   621  				},
   622  				Action: specs.ActAllow,
   623  				Args: []specs.LinuxSeccompArg{
   624  					{
   625  						Index:    1,
   626  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   627  						ValueTwo: 0,
   628  						Op:       specs.OpMaskedEqual,
   629  					},
   630  				},
   631  			})
   632  		default:
   633  			s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{
   634  				Names: []string{
   635  					"clone",
   636  				},
   637  				Action: specs.ActAllow,
   638  				Args: []specs.LinuxSeccompArg{
   639  					{
   640  						Index:    0,
   641  						Value:    unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP,
   642  						ValueTwo: 0,
   643  						Op:       specs.OpMaskedEqual,
   644  					},
   645  				},
   646  			})
   647  		}
   648  	}
   649  
   650  	return s
   651  }