github.com/lalkh/containerd@v1.4.3/contrib/seccomp/seccomp_default.go (about) 1 // +build linux 2 3 /* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package seccomp 20 21 import ( 22 "runtime" 23 24 "golang.org/x/sys/unix" 25 26 "github.com/opencontainers/runtime-spec/specs-go" 27 ) 28 29 func arches() []specs.Arch { 30 switch runtime.GOARCH { 31 case "amd64": 32 return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32} 33 case "arm64": 34 return []specs.Arch{specs.ArchARM, specs.ArchAARCH64} 35 case "mips64": 36 return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} 37 case "mips64n32": 38 return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32} 39 case "mipsel64": 40 return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} 41 case "mipsel64n32": 42 return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32} 43 case "s390x": 44 return []specs.Arch{specs.ArchS390, specs.ArchS390X} 45 default: 46 return []specs.Arch{} 47 } 48 } 49 50 // DefaultProfile defines the allowed syscalls for the default seccomp profile. 51 func DefaultProfile(sp *specs.Spec) *specs.LinuxSeccomp { 52 syscalls := []specs.LinuxSyscall{ 53 { 54 Names: []string{ 55 "accept", 56 "accept4", 57 "access", 58 "adjtimex", 59 "alarm", 60 "bind", 61 "brk", 62 "capget", 63 "capset", 64 "chdir", 65 "chmod", 66 "chown", 67 "chown32", 68 "clock_adjtime", 69 "clock_adjtime64", 70 "clock_getres", 71 "clock_getres_time64", 72 "clock_gettime", 73 "clock_gettime64", 74 "clock_nanosleep", 75 "clock_nanosleep_time64", 76 "close", 77 "connect", 78 "copy_file_range", 79 "creat", 80 "dup", 81 "dup2", 82 "dup3", 83 "epoll_create", 84 "epoll_create1", 85 "epoll_ctl", 86 "epoll_ctl_old", 87 "epoll_pwait", 88 "epoll_wait", 89 "epoll_wait_old", 90 "eventfd", 91 "eventfd2", 92 "execve", 93 "execveat", 94 "exit", 95 "exit_group", 96 "faccessat", 97 "faccessat2", 98 "fadvise64", 99 "fadvise64_64", 100 "fallocate", 101 "fanotify_mark", 102 "fchdir", 103 "fchmod", 104 "fchmodat", 105 "fchown", 106 "fchown32", 107 "fchownat", 108 "fcntl", 109 "fcntl64", 110 "fdatasync", 111 "fgetxattr", 112 "flistxattr", 113 "flock", 114 "fork", 115 "fremovexattr", 116 "fsetxattr", 117 "fstat", 118 "fstat64", 119 "fstatat64", 120 "fstatfs", 121 "fstatfs64", 122 "fsync", 123 "ftruncate", 124 "ftruncate64", 125 "futex", 126 "futex_time64", 127 "futimesat", 128 "getcpu", 129 "getcwd", 130 "getdents", 131 "getdents64", 132 "getegid", 133 "getegid32", 134 "geteuid", 135 "geteuid32", 136 "getgid", 137 "getgid32", 138 "getgroups", 139 "getgroups32", 140 "getitimer", 141 "getpeername", 142 "getpgid", 143 "getpgrp", 144 "getpid", 145 "getppid", 146 "getpriority", 147 "getrandom", 148 "getresgid", 149 "getresgid32", 150 "getresuid", 151 "getresuid32", 152 "getrlimit", 153 "get_robust_list", 154 "getrusage", 155 "getsid", 156 "getsockname", 157 "getsockopt", 158 "get_thread_area", 159 "gettid", 160 "gettimeofday", 161 "getuid", 162 "getuid32", 163 "getxattr", 164 "inotify_add_watch", 165 "inotify_init", 166 "inotify_init1", 167 "inotify_rm_watch", 168 "io_cancel", 169 "ioctl", 170 "io_destroy", 171 "io_getevents", 172 "io_pgetevents", 173 "io_pgetevents_time64", 174 "ioprio_get", 175 "ioprio_set", 176 "io_setup", 177 "io_submit", 178 "io_uring_enter", 179 "io_uring_register", 180 "io_uring_setup", 181 "ipc", 182 "kill", 183 "lchown", 184 "lchown32", 185 "lgetxattr", 186 "link", 187 "linkat", 188 "listen", 189 "listxattr", 190 "llistxattr", 191 "_llseek", 192 "lremovexattr", 193 "lseek", 194 "lsetxattr", 195 "lstat", 196 "lstat64", 197 "madvise", 198 "membarrier", 199 "memfd_create", 200 "mincore", 201 "mkdir", 202 "mkdirat", 203 "mknod", 204 "mknodat", 205 "mlock", 206 "mlock2", 207 "mlockall", 208 "mmap", 209 "mmap2", 210 "mprotect", 211 "mq_getsetattr", 212 "mq_notify", 213 "mq_open", 214 "mq_timedreceive", 215 "mq_timedreceive_time64", 216 "mq_timedsend", 217 "mq_timedsend_time64", 218 "mq_unlink", 219 "mremap", 220 "msgctl", 221 "msgget", 222 "msgrcv", 223 "msgsnd", 224 "msync", 225 "munlock", 226 "munlockall", 227 "munmap", 228 "nanosleep", 229 "newfstatat", 230 "_newselect", 231 "open", 232 "openat", 233 "openat2", 234 "pause", 235 "pidfd_open", 236 "pidfd_send_signal", 237 "pipe", 238 "pipe2", 239 "poll", 240 "ppoll", 241 "ppoll_time64", 242 "prctl", 243 "pread64", 244 "preadv", 245 "preadv2", 246 "prlimit64", 247 "pselect6", 248 "pselect6_time64", 249 "pwrite64", 250 "pwritev", 251 "pwritev2", 252 "read", 253 "readahead", 254 "readlink", 255 "readlinkat", 256 "readv", 257 "recv", 258 "recvfrom", 259 "recvmmsg", 260 "recvmmsg_time64", 261 "recvmsg", 262 "remap_file_pages", 263 "removexattr", 264 "rename", 265 "renameat", 266 "renameat2", 267 "restart_syscall", 268 "rmdir", 269 "rseq", 270 "rt_sigaction", 271 "rt_sigpending", 272 "rt_sigprocmask", 273 "rt_sigqueueinfo", 274 "rt_sigreturn", 275 "rt_sigsuspend", 276 "rt_sigtimedwait", 277 "rt_sigtimedwait_time64", 278 "rt_tgsigqueueinfo", 279 "sched_getaffinity", 280 "sched_getattr", 281 "sched_getparam", 282 "sched_get_priority_max", 283 "sched_get_priority_min", 284 "sched_getscheduler", 285 "sched_rr_get_interval", 286 "sched_rr_get_interval_time64", 287 "sched_setaffinity", 288 "sched_setattr", 289 "sched_setparam", 290 "sched_setscheduler", 291 "sched_yield", 292 "seccomp", 293 "select", 294 "semctl", 295 "semget", 296 "semop", 297 "semtimedop", 298 "semtimedop_time64", 299 "send", 300 "sendfile", 301 "sendfile64", 302 "sendmmsg", 303 "sendmsg", 304 "sendto", 305 "setfsgid", 306 "setfsgid32", 307 "setfsuid", 308 "setfsuid32", 309 "setgid", 310 "setgid32", 311 "setgroups", 312 "setgroups32", 313 "setitimer", 314 "setpgid", 315 "setpriority", 316 "setregid", 317 "setregid32", 318 "setresgid", 319 "setresgid32", 320 "setresuid", 321 "setresuid32", 322 "setreuid", 323 "setreuid32", 324 "setrlimit", 325 "set_robust_list", 326 "setsid", 327 "setsockopt", 328 "set_thread_area", 329 "set_tid_address", 330 "setuid", 331 "setuid32", 332 "setxattr", 333 "shmat", 334 "shmctl", 335 "shmdt", 336 "shmget", 337 "shutdown", 338 "sigaltstack", 339 "signalfd", 340 "signalfd4", 341 "sigprocmask", 342 "sigreturn", 343 "socket", 344 "socketcall", 345 "socketpair", 346 "splice", 347 "stat", 348 "stat64", 349 "statfs", 350 "statfs64", 351 "statx", 352 "symlink", 353 "symlinkat", 354 "sync", 355 "sync_file_range", 356 "syncfs", 357 "sysinfo", 358 "tee", 359 "tgkill", 360 "time", 361 "timer_create", 362 "timer_delete", 363 "timer_getoverrun", 364 "timer_gettime", 365 "timer_gettime64", 366 "timer_settime", 367 "timer_settime64", 368 "timerfd_create", 369 "timerfd_gettime", 370 "timerfd_gettime64", 371 "timerfd_settime", 372 "timerfd_settime64", 373 "times", 374 "tkill", 375 "truncate", 376 "truncate64", 377 "ugetrlimit", 378 "umask", 379 "uname", 380 "unlink", 381 "unlinkat", 382 "utime", 383 "utimensat", 384 "utimensat_time64", 385 "utimes", 386 "vfork", 387 "vmsplice", 388 "wait4", 389 "waitid", 390 "waitpid", 391 "write", 392 "writev", 393 }, 394 Action: specs.ActAllow, 395 Args: []specs.LinuxSeccompArg{}, 396 }, 397 { 398 Names: []string{"personality"}, 399 Action: specs.ActAllow, 400 Args: []specs.LinuxSeccompArg{ 401 { 402 Index: 0, 403 Value: 0x0, 404 Op: specs.OpEqualTo, 405 }, 406 }, 407 }, 408 { 409 Names: []string{"personality"}, 410 Action: specs.ActAllow, 411 Args: []specs.LinuxSeccompArg{ 412 { 413 Index: 0, 414 Value: 0x0008, 415 Op: specs.OpEqualTo, 416 }, 417 }, 418 }, 419 { 420 Names: []string{"personality"}, 421 Action: specs.ActAllow, 422 Args: []specs.LinuxSeccompArg{ 423 { 424 Index: 0, 425 Value: 0x20000, 426 Op: specs.OpEqualTo, 427 }, 428 }, 429 }, 430 { 431 Names: []string{"personality"}, 432 Action: specs.ActAllow, 433 Args: []specs.LinuxSeccompArg{ 434 { 435 Index: 0, 436 Value: 0x20008, 437 Op: specs.OpEqualTo, 438 }, 439 }, 440 }, 441 { 442 Names: []string{"personality"}, 443 Action: specs.ActAllow, 444 Args: []specs.LinuxSeccompArg{ 445 { 446 Index: 0, 447 Value: 0xffffffff, 448 Op: specs.OpEqualTo, 449 }, 450 }, 451 }, 452 } 453 454 s := &specs.LinuxSeccomp{ 455 DefaultAction: specs.ActErrno, 456 Architectures: arches(), 457 Syscalls: syscalls, 458 } 459 460 // include by arch 461 switch runtime.GOARCH { 462 case "ppc64le": 463 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 464 Names: []string{ 465 "sync_file_range2", 466 }, 467 Action: specs.ActAllow, 468 Args: []specs.LinuxSeccompArg{}, 469 }) 470 case "arm", "arm64": 471 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 472 Names: []string{ 473 "arm_fadvise64_64", 474 "arm_sync_file_range", 475 "sync_file_range2", 476 "breakpoint", 477 "cacheflush", 478 "set_tls", 479 }, 480 Action: specs.ActAllow, 481 Args: []specs.LinuxSeccompArg{}, 482 }) 483 case "amd64": 484 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 485 Names: []string{ 486 "arch_prctl", 487 "modify_ldt", 488 }, 489 Action: specs.ActAllow, 490 Args: []specs.LinuxSeccompArg{}, 491 }) 492 case "386": 493 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 494 Names: []string{ 495 "modify_ldt", 496 }, 497 Action: specs.ActAllow, 498 Args: []specs.LinuxSeccompArg{}, 499 }) 500 case "s390", "s390x": 501 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 502 Names: []string{ 503 "s390_pci_mmio_read", 504 "s390_pci_mmio_write", 505 "s390_runtime_instr", 506 }, 507 Action: specs.ActAllow, 508 Args: []specs.LinuxSeccompArg{}, 509 }) 510 } 511 512 admin := false 513 for _, c := range sp.Process.Capabilities.Bounding { 514 switch c { 515 case "CAP_DAC_READ_SEARCH": 516 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 517 Names: []string{"open_by_handle_at"}, 518 Action: specs.ActAllow, 519 Args: []specs.LinuxSeccompArg{}, 520 }) 521 case "CAP_SYS_ADMIN": 522 admin = true 523 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 524 Names: []string{ 525 "bpf", 526 "clone", 527 "fanotify_init", 528 "lookup_dcookie", 529 "mount", 530 "name_to_handle_at", 531 "perf_event_open", 532 "quotactl", 533 "setdomainname", 534 "sethostname", 535 "setns", 536 "syslog", 537 "umount", 538 "umount2", 539 "unshare", 540 }, 541 Action: specs.ActAllow, 542 Args: []specs.LinuxSeccompArg{}, 543 }) 544 case "CAP_SYS_BOOT": 545 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 546 Names: []string{"reboot"}, 547 Action: specs.ActAllow, 548 Args: []specs.LinuxSeccompArg{}, 549 }) 550 case "CAP_SYS_CHROOT": 551 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 552 Names: []string{"chroot"}, 553 Action: specs.ActAllow, 554 Args: []specs.LinuxSeccompArg{}, 555 }) 556 case "CAP_SYS_MODULE": 557 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 558 Names: []string{ 559 "delete_module", 560 "init_module", 561 "finit_module", 562 }, 563 Action: specs.ActAllow, 564 Args: []specs.LinuxSeccompArg{}, 565 }) 566 case "CAP_SYS_PACCT": 567 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 568 Names: []string{"acct"}, 569 Action: specs.ActAllow, 570 Args: []specs.LinuxSeccompArg{}, 571 }) 572 case "CAP_SYS_PTRACE": 573 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 574 Names: []string{ 575 "kcmp", 576 "pidfd_getfd", 577 "process_vm_readv", 578 "process_vm_writev", 579 "ptrace", 580 }, 581 Action: specs.ActAllow, 582 Args: []specs.LinuxSeccompArg{}, 583 }) 584 case "CAP_SYS_RAWIO": 585 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 586 Names: []string{ 587 "iopl", 588 "ioperm", 589 }, 590 Action: specs.ActAllow, 591 Args: []specs.LinuxSeccompArg{}, 592 }) 593 case "CAP_SYS_TIME": 594 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 595 Names: []string{ 596 "settimeofday", 597 "stime", 598 "clock_settime", 599 }, 600 Action: specs.ActAllow, 601 Args: []specs.LinuxSeccompArg{}, 602 }) 603 case "CAP_SYS_TTY_CONFIG": 604 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 605 Names: []string{"vhangup"}, 606 Action: specs.ActAllow, 607 Args: []specs.LinuxSeccompArg{}, 608 }) 609 case "CAP_SYSLOG": 610 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 611 Names: []string{"syslog"}, 612 Action: specs.ActAllow, 613 Args: []specs.LinuxSeccompArg{}, 614 }) 615 } 616 } 617 618 if !admin { 619 switch runtime.GOARCH { 620 case "s390", "s390x": 621 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 622 Names: []string{ 623 "clone", 624 }, 625 Action: specs.ActAllow, 626 Args: []specs.LinuxSeccompArg{ 627 { 628 Index: 1, 629 Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, 630 ValueTwo: 0, 631 Op: specs.OpMaskedEqual, 632 }, 633 }, 634 }) 635 default: 636 s.Syscalls = append(s.Syscalls, specs.LinuxSyscall{ 637 Names: []string{ 638 "clone", 639 }, 640 Action: specs.ActAllow, 641 Args: []specs.LinuxSeccompArg{ 642 { 643 Index: 0, 644 Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, 645 ValueTwo: 0, 646 Op: specs.OpMaskedEqual, 647 }, 648 }, 649 }) 650 } 651 } 652 653 return s 654 }