github.com/containerd/Containerd@v1.4.13/contrib/seccomp/seccomp_default.go (about) 1 // +build linux 2 3 /* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package seccomp 20 21 import ( 22 "runtime" 23 24 "golang.org/x/sys/unix" 25 26 "github.com/opencontainers/runtime-spec/specs-go" 27 ) 28 29 func arches() []specs.Arch { 30 switch runtime.GOARCH { 31 case "amd64": 32 return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32} 33 case "arm64": 34 return []specs.Arch{specs.ArchARM, specs.ArchAARCH64} 35 case "mips64": 36 return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} 37 case "mips64n32": 38 return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} 39 case "mipsel64": 40 return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} 41 case "mipsel64n32": 42 return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} 43 case "s390x": 44 return []specs.Arch{specs.ArchS390, specs.ArchS390X} 45 default: 46 return []specs.Arch{} 47 } 48 } 49 50 // DefaultProfile defines the allowed syscalls for the default seccomp profile. 51 func DefaultProfile(sp *specs.Spec) *specs.LinuxSeccomp { 52 nosys := uint(unix.ENOSYS) 53 syscalls := []specs.LinuxSyscall{ 54 { 55 Names: []string{ 56 "accept", 57 "accept4", 58 "access", 59 "adjtimex", 60 "alarm", 61 "bind", 62 "brk", 63 "capget", 64 "capset", 65 "chdir", 66 "chmod", 67 "chown", 68 "chown32", 69 "clock_adjtime", 70 "clock_adjtime64", 71 "clock_getres", 72 "clock_getres_time64", 73 "clock_gettime", 74 "clock_gettime64", 75 "clock_nanosleep", 76 "clock_nanosleep_time64", 77 "close", 78 "connect", 79 "copy_file_range", 80 "creat", 81 "dup", 82 "dup2", 83 "dup3", 84 "epoll_create", 85 "epoll_create1", 86 "epoll_ctl", 87 "epoll_ctl_old", 88 "epoll_pwait", 89 "epoll_wait", 90 "epoll_wait_old", 91 "eventfd", 92 "eventfd2", 93 "execve", 94 "execveat", 95 "exit", 96 "exit_group", 97 "faccessat", 98 "faccessat2", 99 "fadvise64", 100 "fadvise64_64", 101 "fallocate", 102 "fanotify_mark", 103 "fchdir", 104 "fchmod", 105 "fchmodat", 106 "fchown", 107 "fchown32", 108 "fchownat", 109 "fcntl", 110 "fcntl64", 111 "fdatasync", 112 "fgetxattr", 113 "flistxattr", 114 "flock", 115 "fork", 116 "fremovexattr", 117 "fsetxattr", 118 "fstat", 119 "fstat64", 120 "fstatat64", 121 "fstatfs", 122 "fstatfs64", 123 "fsync", 124 "ftruncate", 125 "ftruncate64", 126 "futex", 127 "futex_time64", 128 "futimesat", 129 "getcpu", 130 "getcwd", 131 "getdents", 132 "getdents64", 133 "getegid", 134 "getegid32", 135 "geteuid", 136 "geteuid32", 137 "getgid", 138 "getgid32", 139 "getgroups", 140 "getgroups32", 141 "getitimer", 142 "getpeername", 143 "getpgid", 144 "getpgrp", 145 "getpid", 146 "getppid", 147 "getpriority", 148 "getrandom", 149 "getresgid", 150 "getresgid32", 151 "getresuid", 152 "getresuid32", 153 "getrlimit", 154 "get_robust_list", 155 "getrusage", 156 "getsid", 157 "getsockname", 158 "getsockopt", 159 "get_thread_area", 160 "gettid", 161 "gettimeofday", 162 "getuid", 163 "getuid32", 164 "getxattr", 165 "inotify_add_watch", 166 "inotify_init", 167 "inotify_init1", 168 "inotify_rm_watch", 169 "io_cancel", 170 "ioctl", 171 "io_destroy", 172 "io_getevents", 173 "io_pgetevents", 174 "io_pgetevents_time64", 175 "ioprio_get", 176 "ioprio_set", 177 "io_setup", 178 "io_submit", 179 "io_uring_enter", 180 "io_uring_register", 181 "io_uring_setup", 182 "ipc", 183 "kill", 184 "lchown", 185 "lchown32", 186 "lgetxattr", 187 "link", 188 "linkat", 189 "listen", 190 "listxattr", 191 "llistxattr", 192 "_llseek", 193 "lremovexattr", 194 "lseek", 195 "lsetxattr", 196 "lstat", 197 "lstat64", 198 "madvise", 199 "membarrier", 200 "memfd_create", 201 "mincore", 202 "mkdir", 203 "mkdirat", 204 "mknod", 205 "mknodat", 206 "mlock", 207 "mlock2", 208 "mlockall", 209 "mmap", 210 "mmap2", 211 "mprotect", 212 "mq_getsetattr", 213 "mq_notify", 214 "mq_open", 215 "mq_timedreceive", 216 "mq_timedreceive_time64", 217 "mq_timedsend", 218 "mq_timedsend_time64", 219 "mq_unlink", 220 "mremap", 221 "msgctl", 222 "msgget", 223 "msgrcv", 224 "msgsnd", 225 "msync", 226 "munlock", 227 "munlockall", 228 "munmap", 229 "nanosleep", 230 "newfstatat", 231 "_newselect", 232 "open", 233 "openat", 234 "openat2", 235 "pause", 236 "pidfd_open", 237 "pidfd_send_signal", 238 "pipe", 239 "pipe2", 240 "poll", 241 "ppoll", 242 "ppoll_time64", 243 "prctl", 244 "pread64", 245 "preadv", 246 "preadv2", 247 "prlimit64", 248 "pselect6", 249 "pselect6_time64", 250 "pwrite64", 251 "pwritev", 252 "pwritev2", 253 "read", 254 "readahead", 255 "readlink", 256 "readlinkat", 257 "readv", 258 "recv", 259 "recvfrom", 260 "recvmmsg", 261 "recvmmsg_time64", 262 "recvmsg", 263 "remap_file_pages", 264 "removexattr", 265 "rename", 266 "renameat", 267 "renameat2", 268 "restart_syscall", 269 "rmdir", 270 "rseq", 271 "rt_sigaction", 272 "rt_sigpending", 273 "rt_sigprocmask", 274 "rt_sigqueueinfo", 275 "rt_sigreturn", 276 "rt_sigsuspend", 277 "rt_sigtimedwait", 278 "rt_sigtimedwait_time64", 279 "rt_tgsigqueueinfo", 280 "sched_getaffinity", 281 "sched_getattr", 282 "sched_getparam", 283 "sched_get_priority_max", 284 "sched_get_priority_min", 285 "sched_getscheduler", 286 "sched_rr_get_interval", 287 "sched_rr_get_interval_time64", 288 "sched_setaffinity", 289 "sched_setattr", 290 "sched_setparam", 291 "sched_setscheduler", 292 "sched_yield", 293 "seccomp", 294 "select", 295 "semctl", 296 "semget", 297 "semop", 298 "semtimedop", 299 "semtimedop_time64", 300 "send", 301 "sendfile", 302 "sendfile64", 303 "sendmmsg", 304 "sendmsg", 305 "sendto", 306 "setfsgid", 307 "setfsgid32", 308 "setfsuid", 309 "setfsuid32", 310 "setgid", 311 "setgid32", 312 "setgroups", 313 "setgroups32", 314 "setitimer", 315 "setpgid", 316 "setpriority", 317 "setregid", 318 "setregid32", 319 "setresgid", 320 "setresgid32", 321 "setresuid", 322 "setresuid32", 323 "setreuid", 324 "setreuid32", 325 "setrlimit", 326 "set_robust_list", 327 "setsid", 328 "setsockopt", 329 "set_thread_area", 330 "set_tid_address", 331 "setuid", 332 "setuid32", 333 "setxattr", 334 "shmat", 335 "shmctl", 336 "shmdt", 337 "shmget", 338 "shutdown", 339 "sigaltstack", 340 "signalfd", 341 "signalfd4", 342 "sigprocmask", 343 "sigreturn", 344 "socket", 345 "socketcall", 346 "socketpair", 347 "splice", 348 "stat", 349 "stat64", 350 "statfs", 351 "statfs64", 352 "statx", 353 "symlink", 354 "symlinkat", 355 "sync", 356 "sync_file_range", 357 "syncfs", 358 "sysinfo", 359 "tee", 360 "tgkill", 361 "time", 362 "timer_create", 363 "timer_delete", 364 "timer_getoverrun", 365 "timer_gettime", 366 "timer_gettime64", 367 "timer_settime", 368 "timer_settime64", 369 "timerfd_create", 370 "timerfd_gettime", 371 "timerfd_gettime64", 372 "timerfd_settime", 373 "timerfd_settime64", 374 "times", 375 "tkill", 376 "truncate", 377 "truncate64", 378 "ugetrlimit", 379 "umask", 380 "uname", 381 "unlink", 382 "unlinkat", 383 "utime", 384 "utimensat", 385 "utimensat_time64", 386 "utimes", 387 "vfork", 388 "vmsplice", 389 "wait4", 390 "waitid", 391 "waitpid", 392 "write", 393 "writev", 394 }, 395 Action: specs.ActAllow, 396 Args: []specs.LinuxSeccompArg{}, 397 }, 398 { 399 Names: []string{"personality"}, 400 Action: specs.ActAllow, 401 Args: []specs.LinuxSeccompArg{ 402 { 403 Index: 0, 404 Value: 0x0, 405 Op: specs.OpEqualTo, 406 }, 407 }, 408 }, 409 { 410 Names: []string{"personality"}, 411 Action: specs.ActAllow, 412 Args: []specs.LinuxSeccompArg{ 413 { 414 Index: 0, 415 Value: 0x0008, 416 Op: specs.OpEqualTo, 417 }, 418 }, 419 }, 420 { 421 Names: []string{"personality"}, 422 Action: specs.ActAllow, 423 Args: []specs.LinuxSeccompArg{ 424 { 425 Index: 0, 426 Value: 0x20000, 427 Op: specs.OpEqualTo, 428 }, 429 }, 430 }, 431 { 432 Names: []string{"personality"}, 433 Action: specs.ActAllow, 434 Args: []specs.LinuxSeccompArg{ 435 { 436 Index: 0, 437 Value: 0x20008, 438 Op: specs.OpEqualTo, 439 }, 440 }, 441 }, 442 { 443 Names: []string{"personality"}, 444 Action: specs.ActAllow, 445 Args: []specs.LinuxSeccompArg{ 446 { 447 Index: 0, 448 Value: 0xffffffff, 449 Op: specs.OpEqualTo, 450 }, 451 }, 452 }, 453 } 454 455 s := &specs.LinuxSeccomp{ 456 DefaultAction: specs.ActErrno, 457 Architectures: arches(), 458 Syscalls: syscalls, 459 } 460 461 // include by arch 462 switch runtime.GOARCH { 463 case "ppc64le": 464 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 465 Names: []string{ 466 "sync_file_range2", 467 }, 468 Action: specs.ActAllow, 469 Args: []specs.LinuxSeccompArg{}, 470 }) 471 case "arm", "arm64": 472 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 473 Names: []string{ 474 "arm_fadvise64_64", 475 "arm_sync_file_range", 476 "sync_file_range2", 477 "breakpoint", 478 "cacheflush", 479 "set_tls", 480 }, 481 Action: specs.ActAllow, 482 Args: []specs.LinuxSeccompArg{}, 483 }) 484 case "amd64": 485 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 486 Names: []string{ 487 "arch_prctl", 488 "modify_ldt", 489 }, 490 Action: specs.ActAllow, 491 Args: []specs.LinuxSeccompArg{}, 492 }) 493 case "386": 494 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 495 Names: []string{ 496 "modify_ldt", 497 }, 498 Action: specs.ActAllow, 499 Args: []specs.LinuxSeccompArg{}, 500 }) 501 case "s390", "s390x": 502 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 503 Names: []string{ 504 "s390_pci_mmio_read", 505 "s390_pci_mmio_write", 506 "s390_runtime_instr", 507 }, 508 Action: specs.ActAllow, 509 Args: []specs.LinuxSeccompArg{}, 510 }) 511 } 512 513 admin := false 514 for _, c := range sp.Process.Capabilities.Bounding { 515 switch c { 516 case "CAP_DAC_READ_SEARCH": 517 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 518 Names: []string{"open_by_handle_at"}, 519 Action: specs.ActAllow, 520 Args: []specs.LinuxSeccompArg{}, 521 }) 522 case "CAP_SYS_ADMIN": 523 admin = true 524 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 525 Names: []string{ 526 "bpf", 527 "clone", 528 "clone3", 529 "fanotify_init", 530 "lookup_dcookie", 531 "mount", 532 "name_to_handle_at", 533 "perf_event_open", 534 "quotactl", 535 "setdomainname", 536 "sethostname", 537 "setns", 538 "syslog", 539 "umount", 540 "umount2", 541 "unshare", 542 }, 543 Action: specs.ActAllow, 544 Args: []specs.LinuxSeccompArg{}, 545 }) 546 case "CAP_SYS_BOOT": 547 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 548 Names: []string{"reboot"}, 549 Action: specs.ActAllow, 550 Args: []specs.LinuxSeccompArg{}, 551 }) 552 case "CAP_SYS_CHROOT": 553 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 554 Names: []string{"chroot"}, 555 Action: specs.ActAllow, 556 Args: []specs.LinuxSeccompArg{}, 557 }) 558 case "CAP_SYS_MODULE": 559 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 560 Names: []string{ 561 "delete_module", 562 "init_module", 563 "finit_module", 564 }, 565 Action: specs.ActAllow, 566 Args: []specs.LinuxSeccompArg{}, 567 }) 568 case "CAP_SYS_PACCT": 569 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 570 Names: []string{"acct"}, 571 Action: specs.ActAllow, 572 Args: []specs.LinuxSeccompArg{}, 573 }) 574 case "CAP_SYS_PTRACE": 575 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 576 Names: []string{ 577 "kcmp", 578 "pidfd_getfd", 579 "process_vm_readv", 580 "process_vm_writev", 581 "ptrace", 582 }, 583 Action: specs.ActAllow, 584 Args: []specs.LinuxSeccompArg{}, 585 }) 586 case "CAP_SYS_RAWIO": 587 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 588 Names: []string{ 589 "iopl", 590 "ioperm", 591 }, 592 Action: specs.ActAllow, 593 Args: []specs.LinuxSeccompArg{}, 594 }) 595 case "CAP_SYS_TIME": 596 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 597 Names: []string{ 598 "settimeofday", 599 "stime", 600 "clock_settime", 601 }, 602 Action: specs.ActAllow, 603 Args: []specs.LinuxSeccompArg{}, 604 }) 605 case "CAP_SYS_TTY_CONFIG": 606 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 607 Names: []string{"vhangup"}, 608 Action: specs.ActAllow, 609 Args: []specs.LinuxSeccompArg{}, 610 }) 611 case "CAP_SYSLOG": 612 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 613 Names: []string{"syslog"}, 614 Action: specs.ActAllow, 615 Args: []specs.LinuxSeccompArg{}, 616 }) 617 } 618 } 619 620 if !admin { 621 switch runtime.GOARCH { 622 case "s390", "s390x": 623 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 624 Names: []string{ 625 "clone", 626 }, 627 Action: specs.ActAllow, 628 Args: []specs.LinuxSeccompArg{ 629 { 630 Index: 1, 631 Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, 632 ValueTwo: 0, 633 Op: specs.OpMaskedEqual, 634 }, 635 }, 636 }) 637 default: 638 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 639 Names: []string{ 640 "clone", 641 }, 642 Action: specs.ActAllow, 643 Args: []specs.LinuxSeccompArg{ 644 { 645 Index: 0, 646 Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, 647 ValueTwo: 0, 648 Op: specs.OpMaskedEqual, 649 }, 650 }, 651 }) 652 } 653 // clone3 is explicitly requested to give ENOSYS instead of the default EPERM, when CAP_SYS_ADMIN is unset 654 // https://github.com/moby/moby/pull/42681 655 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 656 Names: []string{ 657 "clone3", 658 }, 659 Action: specs.ActErrno, 660 ErrnoRet: &nosys, 661 }) 662 } 663 664 return s 665 }