gitee.com/mysnapcore/mysnapd@v0.1.0/interfaces/seccomp/template.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2016-2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package seccomp 21 22 // defaultTemplate contains default seccomp template. 23 // It can be overridden for testing using MockTemplate(). 24 var defaultTemplate = []byte(` 25 # Description: Allows access to app-specific directories and basic runtime 26 # 27 # The default seccomp policy is default deny with a whitelist of allowed 28 # syscalls. The default policy is intended to be safe for any application to 29 # use and should be evaluated in conjunction with other security backends (eg 30 # AppArmor). For example, a few particularly problematic syscalls that are left 31 # out of the default policy are (non-exhaustive): 32 # - kexec_load 33 # - create_module, init_module, finit_module, delete_module (kernel modules) 34 # - name_to_handle_at (history of vulnerabilities) 35 # - open_by_handle_at (history of vulnerabilities) 36 # - ptrace (can be used to break out of sandbox with <4.8 kernels) 37 # - add_key, keyctl, request_key (kernel keyring) 38 39 # 40 # Allowed accesses 41 # 42 43 access 44 faccessat 45 faccessat2 46 47 alarm 48 brk 49 50 # ARM private syscalls 51 breakpoint 52 cacheflush 53 get_tls 54 set_tls 55 usr26 56 usr32 57 58 capget 59 # AppArmor mediates capabilities, so allow capset (useful for apps that for 60 # example want to drop capabilities) 61 capset 62 63 chdir 64 fchdir 65 66 # We can't effectively block file perms due to open() with O_CREAT, so allow 67 # chmod until we have syscall arg filtering (LP: #1446748) 68 chmod 69 fchmod 70 fchmodat 71 72 # Daemons typically run as 'root' so allow chown to 'root'. DAC will prevent 73 # non-root from chowning to root. 74 # (chown root:root) 75 chown - u:root g:root 76 chown32 - u:root g:root 77 fchown - u:root g:root 78 fchown32 - u:root g:root 79 fchownat - - u:root g:root 80 lchown - u:root g:root 81 lchown32 - u:root g:root 82 # (chown root) 83 chown - u:root -1 84 chown32 - u:root -1 85 fchown - u:root -1 86 fchown32 - u:root -1 87 fchownat - - u:root -1 88 lchown - u:root -1 89 lchown32 - u:root -1 90 # (chgrp root) 91 chown - -1 g:root 92 chown32 - -1 g:root 93 fchown - -1 g:root 94 fchown32 - -1 g:root 95 fchownat - - -1 g:root 96 lchown - -1 g:root 97 lchown32 - -1 g:root 98 99 clock_getres 100 clock_getres_time64 101 clock_gettime 102 clock_gettime64 103 clock_nanosleep 104 clock_nanosleep_time64 105 clone 106 clone3 107 close 108 close_range 109 110 # needed by ls -l 111 connect 112 113 # the file descriptors used here will already be mediated by apparmor, 114 # the 6th argument is flags, which currently is always 0 115 copy_file_range - - - - - 0 116 117 chroot 118 119 creat 120 dup 121 dup2 122 dup3 123 epoll_create 124 epoll_create1 125 epoll_ctl 126 epoll_ctl_old 127 epoll_pwait 128 epoll_wait 129 epoll_wait_old 130 eventfd 131 eventfd2 132 execve 133 execveat 134 _exit 135 exit 136 exit_group 137 fallocate 138 139 # requires CAP_SYS_ADMIN 140 #fanotify_init 141 #fanotify_mark 142 143 fcntl 144 fcntl64 145 flock 146 fork 147 ftime 148 futex 149 futex_time64 150 futex_waitv 151 get_mempolicy 152 get_robust_list 153 get_thread_area 154 getcpu 155 getcwd 156 getdents 157 getdents64 158 getegid 159 getegid32 160 geteuid 161 geteuid32 162 getgid 163 getgid32 164 getgroups 165 getgroups32 166 getitimer 167 getpgid 168 getpgrp 169 getpid 170 getppid 171 getpriority 172 getrandom 173 getresgid 174 getresgid32 175 getresuid 176 getresuid32 177 178 getrlimit 179 ugetrlimit 180 181 getrusage 182 getsid 183 gettid 184 gettimeofday 185 getuid 186 getuid32 187 188 getxattr 189 fgetxattr 190 lgetxattr 191 192 inotify_add_watch 193 inotify_init 194 inotify_init1 195 inotify_rm_watch 196 197 # ioctl() mediation currently primarily relies on Linux capabilities as well as 198 # the initial syscall for the fd to pass to ioctl(). See 'man capabilities' 199 # and 'man ioctl_list'. TIOCSTI requires CAP_SYS_ADMIN but allows for faking 200 # input (man tty_ioctl), so we disallow it to prevent snaps plugging interfaces 201 # with 'capability sys_admin' from interfering with other snaps or the 202 # unconfined user's terminal. 203 # TODO: this should be scaled back even more 204 ioctl - !TIOCSTI 205 206 io_cancel 207 io_destroy 208 io_getevents 209 io_pgetevents 210 io_pgetevents_time64 211 io_setup 212 io_submit 213 ioprio_get 214 # affects other processes, requires CAP_SYS_ADMIN. Potentially allow with 215 # syscall filtering of (at least) IOPRIO_WHO_USER (LP: #1446748) 216 #ioprio_set 217 218 ipc 219 kill 220 link 221 linkat 222 223 listxattr 224 llistxattr 225 flistxattr 226 227 lseek 228 llseek 229 _llseek 230 lstat 231 lstat64 232 233 madvise 234 fadvise64 235 fadvise64_64 236 arm_fadvise64_64 237 238 mbind 239 membarrier 240 memfd_create 241 mincore 242 mkdir 243 mkdirat 244 mlock 245 mlock2 246 mlockall 247 mmap 248 mmap2 249 250 # Allow mknod for regular files, pipes and sockets (and not block or char 251 # devices) 252 mknod - |S_IFREG - 253 mknodat - - |S_IFREG - 254 mknod - |S_IFIFO - 255 mknodat - - |S_IFIFO - 256 mknod - |S_IFSOCK - 257 mknodat - - |S_IFSOCK - 258 259 modify_ldt 260 mprotect 261 262 mremap 263 msgctl 264 msgget 265 msgrcv 266 msgsnd 267 msync 268 munlock 269 munlockall 270 munmap 271 272 nanosleep 273 274 # Argument filtering with gt/ge/lt/le does not work properly with 275 # libseccomp < 2.4 or golang-seccomp < 0.9.1. See: 276 # - https://bugs.launchpad.net/snapd/+bug/1825052/comments/9 277 # - https://github.com/seccomp/libseccomp/issues/69 278 # Eventually we want to use >=0, but we need libseccomp and golang-seccomp to 279 # be updated everywhere first. In the meantime, use <=19 and rely on the fact 280 # that AppArmor mediates CAP_SYS_NICE (and for systems without AppArmor, we 281 # ignore this lack of mediation since snaps are not meaningfully confined). 282 # 283 # Allow using nice() with default or lower priority 284 nice <=19 285 # Allow using setpriority to set the priority of the calling process to default 286 # or lower priority (eg, 'nice -n 9 <command>') 287 setpriority PRIO_PROCESS 0 <=19 288 289 # LP: #1446748 - support syscall arg filtering for mode_t with O_CREAT 290 open 291 292 openat 293 pause 294 personality 295 pipe 296 pipe2 297 poll 298 ppoll 299 ppoll_time64 300 301 # LP: #1446748 - support syscall arg filtering 302 prctl 303 arch_prctl 304 305 read 306 pread 307 pread64 308 preadv 309 readv 310 311 readahead 312 readdir 313 readlink 314 readlinkat 315 316 # allow reading from sockets 317 recv 318 recvfrom 319 recvmsg 320 recvmmsg 321 recvmmsg_time64 322 323 remap_file_pages 324 325 removexattr 326 fremovexattr 327 lremovexattr 328 329 rename 330 renameat 331 renameat2 332 333 # The man page says this shouldn't be needed, but we've seen denials for it 334 # in the wild 335 restart_syscall 336 337 rmdir 338 339 # glibc 2.35 unconditionally calls rseq for all threads 340 rseq 341 342 rt_sigaction 343 rt_sigpending 344 rt_sigprocmask 345 rt_sigqueueinfo 346 rt_sigreturn 347 rt_sigsuspend 348 rt_sigtimedwait 349 rt_sigtimedwait_time64 350 rt_tgsigqueueinfo 351 sched_getaffinity 352 sched_getattr 353 sched_getparam 354 sched_get_priority_max 355 sched_get_priority_min 356 sched_getscheduler 357 sched_rr_get_interval 358 sched_rr_get_interval_time64 359 # enforce pid_t is 0 so the app may only change its own scheduler and affinity. 360 # Use process-control interface for controlling other pids. 361 sched_setaffinity 0 - - 362 sched_setparam 0 - 363 364 # 'sched_setscheduler' without argument filtering was allowed in 2.21 and 365 # earlier and 2.22 added 'sched_setscheduler 0 - -', introducing LP: #1661265. 366 # For now, continue to allow sched_setscheduler unconditionally. 367 sched_setscheduler 368 369 sched_yield 370 371 # Allow configuring seccomp filter. This is ok because the kernel enforces that 372 # the new filter is a subset of the current filter (ie, no widening 373 # permissions) 374 seccomp 375 376 select 377 _newselect 378 pselect 379 pselect6 380 pselect6_time64 381 382 # Allow use of SysV semaphores. Note that allocated resources are not freed by 383 # OOM which can lead to global kernel resource leakage. 384 semctl 385 semget 386 semop 387 semtimedop 388 semtimedop_time64 389 390 # allow sending to sockets 391 send 392 sendto 393 sendmsg 394 sendmmsg 395 396 sendfile 397 sendfile64 398 399 # These break isolation but are common and can't be mediated at the seccomp 400 # level with arg filtering 401 setpgid 402 setpgrp 403 404 set_thread_area 405 setitimer 406 407 # apps don't have CAP_SYS_RESOURCE so these can't be abused to raise the hard 408 # limits 409 setrlimit 410 prlimit64 411 412 set_mempolicy 413 set_robust_list 414 setsid 415 set_tid_address 416 417 setxattr 418 fsetxattr 419 lsetxattr 420 421 shmat 422 shmctl 423 shmdt 424 shmget 425 shutdown 426 signal 427 sigaction 428 signalfd 429 signalfd4 430 sigaltstack 431 sigpending 432 sigprocmask 433 sigreturn 434 sigsuspend 435 sigtimedwait 436 sigwaitinfo 437 438 # AppArmor mediates AF_UNIX/AF_LOCAL via 'unix' rules and all other AF_* 439 # domains via 'network' rules. We won't allow bare 'network' AppArmor rules, so 440 # we can allow 'socket' for all domains except AF_NETLINK and let AppArmor 441 # handle the rest. 442 socket AF_UNIX 443 socket AF_LOCAL 444 socket AF_INET 445 socket AF_INET6 446 socket AF_IPX 447 socket AF_XDP 448 socket AF_X25 449 socket AF_AX25 450 socket AF_ATMPVC 451 socket AF_APPLETALK 452 socket AF_PACKET 453 socket AF_ALG 454 socket AF_CAN 455 socket AF_BRIDGE 456 socket AF_NETROM 457 socket AF_ROSE 458 socket AF_NETBEUI 459 socket AF_SECURITY 460 socket AF_KEY 461 socket AF_ASH 462 socket AF_ECONET 463 socket AF_SNA 464 socket AF_IRDA 465 socket AF_PPPOX 466 socket AF_WANPIPE 467 socket AF_BLUETOOTH 468 socket AF_RDS 469 socket AF_LLC 470 socket AF_TIPC 471 socket AF_IUCV 472 socket AF_RXRPC 473 socket AF_ISDN 474 socket AF_PHONET 475 socket AF_IEEE802154 476 socket AF_CAIF 477 socket AF_NFC 478 socket AF_VSOCK 479 socket AF_MPLS 480 socket AF_IB 481 socket AF_QIPCRTR 482 483 # For usrsctp, AppArmor doesn't support 'network conn,' since AF_CONN is 484 # userspace and encapsulated in other domains that are mediated. As such, do 485 # not allow AF_CONN by default here. 486 # socket AF_CONN 487 488 # For AF_NETLINK, we'll use a combination of AppArmor coarse mediation and 489 # seccomp arg filtering of netlink families. 490 # socket AF_NETLINK - - 491 492 # needed by snapctl 493 getsockopt 494 setsockopt 495 getsockname 496 getpeername 497 498 # Per man page, on Linux this is limited to only AF_UNIX so it is ok to have 499 # in the default template 500 socketpair 501 502 splice 503 504 stat 505 stat64 506 fstat 507 fstat64 508 fstatat64 509 lstat 510 newfstatat 511 oldfstat 512 oldlstat 513 oldstat 514 statx 515 516 statfs 517 statfs64 518 fstatfs 519 fstatfs64 520 statvfs 521 fstatvfs 522 ustat 523 524 symlink 525 symlinkat 526 527 sync 528 sync_file_range 529 sync_file_range2 530 arm_sync_file_range 531 fdatasync 532 fsync 533 syncfs 534 sysinfo 535 syslog 536 tee 537 tgkill 538 time 539 timer_create 540 timer_delete 541 timer_getoverrun 542 timer_gettime 543 timer_gettime64 544 timer_settime 545 timer_settime64 546 timerfd 547 timerfd_create 548 timerfd_gettime 549 timerfd_gettime64 550 timerfd_settime 551 timerfd_settime64 552 times 553 tkill 554 555 truncate 556 truncate64 557 ftruncate 558 ftruncate64 559 560 umask 561 562 uname 563 olduname 564 oldolduname 565 566 unlink 567 unlinkat 568 569 utime 570 utimensat 571 utimensat_time64 572 utimes 573 futimesat 574 575 vfork 576 vmsplice 577 wait4 578 oldwait4 579 waitpid 580 waitid 581 582 write 583 writev 584 pwrite 585 pwrite64 586 pwritev 587 `) 588 589 // Go's net package attempts to bind early to check whether IPv6 is available or not. 590 // For systems with apparmor enabled, this will be mediated and cause an error to be 591 // returned. Without apparmor, the call goes through to seccomp and the process is 592 // killed instead of just getting the error. 593 // 594 // For that reason once apparmor is disabled the seccomp profile is given access 595 // to bind, so that these processes are not improperly killed. There is on going 596 // work to make seccomp return an error in those cases as well and log the error. 597 // Once that's in place we can drop this hack. 598 const bindSyscallWorkaround = ` 599 # Add bind() for systems with only Seccomp enabled to workaround 600 # LP #1644573 601 bind 602 ` 603 604 // socketcall is an older interface and single entry point that can be used 605 // instead of socket(), bind(), connect(), etc individually. It isn't needed 606 // by most architectures with new enough kernels and glibc, so we leave it out 607 // of the default policy and add only when needed. 608 const socketcallSyscallDeprecated = ` 609 # Add socketcall() for system and/or base that requires it. LP: #1446748 610 socketcall 611 ` 612 613 // Historically snapd has allowed the use of the various setuid, setgid and 614 // setgroups syscalls, relying on AppArmor for mediation of the CAP_SETUID and 615 // CAP_SETGID. In core20, these can be dropped. 616 var barePrivDropSyscalls = ` 617 # Allow these and rely on AppArmor to mediate CAP_SETUID and CAP_SETGID. When 618 # dropping to particular UID/GIDs, we'll use a different set of 619 # argument-filtered syscalls. 620 setgid 621 setgid32 622 setregid 623 setregid32 624 setresgid 625 setresgid32 626 setresuid 627 setresuid32 628 setreuid 629 setreuid32 630 setuid 631 setuid32 632 ` 633 634 // Syscalls for setuid/setgid family of syscalls when dealing with only root 635 // uid and gid 636 var rootSetUidGidSyscalls = ` 637 # Allow various setuid/setgid/chown family of syscalls with argument 638 # filtering. AppArmor has corresponding CAP_SETUID, CAP_SETGID and CAP_CHOWN 639 # rules. 640 641 # allow use of setgroups(0, ...). Note: while the setgroups() man page states 642 # that 'setgroups(0, NULL) should be used to clear all supplementary groups, 643 # the kernel will not consult the group list when size is '0', so we allow it 644 # to be anything for compatibility with (arguably buggy) programs that expect 645 # to clear the groups with 'setgroups(0, <non-null>). 646 setgroups 0 - 647 setgroups32 0 - 648 649 # allow setgid to root 650 setgid g:root 651 setgid32 g:root 652 653 # allow setuid to root 654 setuid u:root 655 setuid32 u:root 656 657 # allow setregid to root 658 setregid g:root g:root 659 setregid32 g:root g:root 660 setregid -1 g:root 661 setregid32 -1 g:root 662 setregid g:root -1 663 setregid32 g:root -1 664 665 # allow setresgid to root 666 # (permanent drop) 667 setresgid g:root g:root g:root 668 setresgid32 g:root g:root g:root 669 # (setegid) 670 setresgid -1 g:root -1 671 setresgid32 -1 g:root -1 672 # (setgid equivalent) 673 setresgid g:root g:root -1 674 setresgid32 g:root g:root -1 675 676 # allow setreuid to root 677 setreuid u:root u:root 678 setreuid32 u:root u:root 679 setreuid -1 u:root 680 setreuid32 -1 u:root 681 setreuid u:root -1 682 setreuid32 u:root -1 683 684 # allow setresuid to root 685 # (permanent drop) 686 setresuid u:root u:root u:root 687 setresuid32 u:root u:root u:root 688 # (seteuid) 689 setresuid -1 u:root -1 690 setresuid32 -1 u:root -1 691 # (setuid equivalent) 692 setresuid u:root u:root -1 693 setresuid32 u:root u:root -1 694 ` 695 696 // Template for privilege drop and chown operations. This intentionally does 697 // not support all combinations of users or obscure combinations (we can add 698 // combinations as users dictate). Eg, these are supported: 699 // chown foo:foo 700 // chown foo 701 // chgrp foo 702 // but these are not: 703 // chown foo:bar 704 // chown bar:foo 705 // For now, users who want 'foo:bar' can do: 706 // chown foo ; chgrp bar 707 var privDropAndChownSyscalls = ` 708 # allow setgid to ###GROUP### 709 setgid g:###GROUP### 710 setgid32 g:###GROUP### 711 712 # allow setregid to ###GROUP### 713 setregid g:###GROUP### g:###GROUP### 714 setregid32 g:###GROUP### g:###GROUP### 715 setregid -1 g:###GROUP### 716 setregid32 -1 g:###GROUP### 717 setregid g:###GROUP### -1 718 setregid32 g:###GROUP### -1 719 # (real root) 720 setregid g:root g:###GROUP### 721 setregid32 g:root g:###GROUP### 722 # (euid root) 723 setregid g:###GROUP### g:root 724 setregid32 g:###GROUP### g:root 725 726 # allow setresgid to ###GROUP### 727 # (permanent drop) 728 setresgid g:###GROUP### g:###GROUP### g:###GROUP### 729 setresgid32 g:###GROUP### g:###GROUP### g:###GROUP### 730 # (setegid) 731 setresgid -1 g:###GROUP### -1 732 setresgid32 -1 g:###GROUP### -1 733 # (setgid equivalent) 734 setresgid g:###GROUP### g:###GROUP### -1 735 setresgid32 g:###GROUP### g:###GROUP### -1 736 # (saving root) 737 setresgid g:###GROUP### g:###GROUP### g:root 738 setresgid32 g:###GROUP### g:###GROUP### g:root 739 # (euid root and saving root) 740 setresgid g:###GROUP### g:root g:root 741 setresgid32 g:###GROUP### g:root g:root 742 743 # allow setuid to ###USERNAME### 744 setuid u:###USERNAME### 745 setuid32 u:###USERNAME### 746 747 # allow setreuid to ###USERNAME### 748 setreuid u:###USERNAME### u:###USERNAME### 749 setreuid32 u:###USERNAME### u:###USERNAME### 750 setreuid -1 u:###USERNAME### 751 setreuid32 -1 u:###USERNAME### 752 setreuid u:###USERNAME### -1 753 setreuid32 u:###USERNAME### -1 754 # (real root) 755 setreuid u:root u:###USERNAME### 756 setreuid32 u:root u:###USERNAME### 757 # (euid root) 758 setreuid u:###USERNAME### u:root 759 setreuid32 u:###USERNAME### u:root 760 761 # allow setresuid to ###USERNAME### 762 # (permanent drop) 763 setresuid u:###USERNAME### u:###USERNAME### u:###USERNAME### 764 setresuid32 u:###USERNAME### u:###USERNAME### u:###USERNAME### 765 # (seteuid) 766 setresuid -1 u:###USERNAME### -1 767 setresuid32 -1 u:###USERNAME### -1 768 # (setuid equivalent) 769 setresuid u:###USERNAME### u:###USERNAME### -1 770 setresuid32 u:###USERNAME### u:###USERNAME### -1 771 # (saving root) 772 setresuid u:###USERNAME### u:###USERNAME### u:root 773 setresuid32 u:###USERNAME### u:###USERNAME### u:root 774 # (euid root and saving root) 775 setresuid u:###USERNAME### u:root u:root 776 setresuid32 u:###USERNAME### u:root u:root 777 778 # allow chown to ###USERNAME###:###GROUP### 779 # (chown ###USERNAME###:###GROUP###) 780 chown - u:###USERNAME### g:###GROUP### 781 chown32 - u:###USERNAME### g:###GROUP### 782 fchown - u:###USERNAME### g:###GROUP### 783 fchown32 - u:###USERNAME### g:###GROUP### 784 fchownat - - u:###USERNAME### g:###GROUP### 785 lchown - u:###USERNAME### g:###GROUP### 786 lchown32 - u:###USERNAME### g:###GROUP### 787 # (chown ###USERNAME###) 788 chown - u:###USERNAME### -1 789 chown32 - u:###USERNAME### -1 790 fchown - u:###USERNAME### -1 791 fchown32 - u:###USERNAME### -1 792 fchownat - - u:###USERNAME### -1 793 lchown - u:###USERNAME### -1 794 lchown32 - u:###USERNAME### -1 795 # (chgrp ###GROUP###) 796 chown - -1 g:###GROUP### 797 chown32 - -1 g:###GROUP### 798 fchown - -1 g:###GROUP### 799 fchown32 - -1 g:###GROUP### 800 fchownat - - -1 g:###GROUP### 801 lchown - -1 g:###GROUP### 802 lchown32 - -1 g:###GROUP### 803 804 # allow chown to ###USERNAME###:root 805 chown - u:###USERNAME### g:root 806 chown32 - u:###USERNAME### g:root 807 fchown - u:###USERNAME### g:root 808 fchown32 - u:###USERNAME### g:root 809 fchownat - - u:###USERNAME### g:root 810 lchown - u:###USERNAME### g:root 811 lchown32 - u:###USERNAME### g:root 812 813 # allow chown to root:###GROUP### 814 chown - u:root g:###GROUP### 815 chown32 - u:root g:###GROUP### 816 fchown - u:root g:###GROUP### 817 fchown32 - u:root g:###GROUP### 818 fchownat - - u:root g:###GROUP### 819 lchown - u:root g:###GROUP### 820 lchown32 - u:root g:###GROUP### 821 `