github.com/chipaca/snappy@v0.0.0-20210104084008-1f06296fe8ad/interfaces/builtin/docker_support.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2016-2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package builtin 21 22 import ( 23 "fmt" 24 25 "github.com/snapcore/snapd/interfaces" 26 "github.com/snapcore/snapd/interfaces/apparmor" 27 "github.com/snapcore/snapd/interfaces/kmod" 28 "github.com/snapcore/snapd/interfaces/seccomp" 29 "github.com/snapcore/snapd/interfaces/udev" 30 "github.com/snapcore/snapd/release" 31 apparmor_sandbox "github.com/snapcore/snapd/sandbox/apparmor" 32 "github.com/snapcore/snapd/snap" 33 ) 34 35 const dockerSupportSummary = `allows operating as the Docker daemon` 36 37 const dockerSupportBaseDeclarationPlugs = ` 38 docker-support: 39 allow-installation: false 40 deny-auto-connection: true 41 ` 42 43 const dockerSupportBaseDeclarationSlots = ` 44 docker-support: 45 allow-installation: 46 slot-snap-type: 47 - core 48 deny-auto-connection: true 49 ` 50 51 const dockerSupportConnectedPlugAppArmorCore = ` 52 # These accesses are necessary for Ubuntu Core 16 and 18, likely due to the 53 # version of apparmor or the kernel which doesn't resolve the upper layer of an 54 # overlayfs mount correctly the accesses show up as runc trying to read from 55 # /system-data/var/snap/docker/common/var-lib-docker/overlay2/$SHA/diff/ 56 /system-data/var/snap/{@{SNAP_NAME},@{SNAP_INSTANCE_NAME}}/common/{,**} rwl, 57 /system-data/var/snap/{@{SNAP_NAME},@{SNAP_INSTANCE_NAME}}/@{SNAP_REVISION}/{,**} rwl, 58 ` 59 60 const dockerSupportConnectedPlugAppArmor = ` 61 # Description: allow operating as the Docker daemon/containerd. This policy is 62 # intentionally not restrictive and is here to help guard against programming 63 # errors and not for security confinement. The Docker daemon by design requires 64 # extensive access to the system and cannot be effectively confined against 65 # malicious activity. 66 67 #include <abstractions/dbus-strict> 68 69 # Allow sockets/etc for docker 70 /{,var/}run/docker.sock rw, 71 /{,var/}run/docker/ rw, 72 /{,var/}run/docker/** mrwklix, 73 /{,var/}run/runc/ rw, 74 /{,var/}run/runc/** mrwklix, 75 76 # Allow sockets/etc for containerd 77 /{,var/}run/containerd/{,s/,runc/,runc/k8s.io/,runc/k8s.io/*/} rw, 78 /{,var/}run/containerd/runc/k8s.io/*/** rwk, 79 /{,var/}run/containerd/{io.containerd*/,io.containerd*/k8s.io/,io.containerd*/k8s.io/*/} rw, 80 /{,var/}run/containerd/io.containerd*/*/** rwk, 81 /{,var/}run/containerd/s/** rwk, 82 83 # Limit ipam-state to k8s 84 /run/ipam-state/k8s-** rw, 85 /run/ipam-state/k8s-*/lock k, 86 87 # Socket for docker-containerd-shim 88 unix (bind,listen) type=stream addr="@/containerd-shim/**.sock\x00", 89 90 /{,var/}run/mount/utab r, 91 92 # Wide read access to /proc, but somewhat limited writes for now 93 @{PROC}/ r, 94 @{PROC}/** r, 95 @{PROC}/[0-9]*/attr/{,apparmor/}exec w, 96 @{PROC}/[0-9]*/oom_score_adj w, 97 98 # Limited read access to specific bits of /sys 99 /sys/kernel/mm/hugepages/ r, 100 /sys/kernel/mm/transparent_hugepage/{,**} r, 101 /sys/fs/cgroup/cpuset/cpuset.cpus r, 102 /sys/fs/cgroup/cpuset/cpuset.mems r, 103 /sys/module/apparmor/parameters/enabled r, 104 105 # Limit cgroup writes a bit (Docker uses a "docker" sub-group) 106 /sys/fs/cgroup/*/docker/ rw, 107 /sys/fs/cgroup/*/docker/** rw, 108 109 # Also allow cgroup writes to kubernetes pods 110 /sys/fs/cgroup/*/kubepods/ rw, 111 /sys/fs/cgroup/*/kubepods/** rw, 112 113 # containerd can also be configured to use the systemd cgroup driver via 114 # plugins.cri.systemd_cgroup = true which moves container processes into 115 # systemd-managed cgroups. This is now the recommended configuration since it 116 # provides a single cgroup manager (systemd) in an effort to achieve consistent 117 # views of resources. 118 /sys/fs/cgroup/*/systemd/{,system.slice/} rw, # create missing dirs 119 /sys/fs/cgroup/*/systemd/system.slice/** r, 120 /sys/fs/cgroup/*/systemd/system.slice/cgroup.procs w, 121 122 # Allow tracing ourself (especially the "runc" process we create) 123 ptrace (trace) peer=@{profile_name}, 124 125 # Docker needs a lot of caps, but limits them in the app container 126 capability, 127 128 # Docker does all kinds of mounts all over the filesystem 129 /dev/mapper/control rw, 130 /dev/mapper/docker* rw, 131 /dev/loop-control r, 132 /dev/loop[0-9]* rw, 133 /sys/devices/virtual/block/dm-[0-9]*/** r, 134 mount, 135 umount, 136 137 # After doing a pivot_root using <graph-dir>/<container-fs>/.pivot_rootNNNNNN, 138 # Docker removes the leftover /.pivot_rootNNNNNN directory (which is now 139 # relative to "/" instead of "<graph-dir>/<container-fs>" thanks to pivot_root) 140 pivot_root, 141 /.pivot_root[0-9]*/ rw, 142 143 # file descriptors (/proc/NNN/fd/X) 144 # file descriptors in the container show up here due to attach_disconnected 145 /[0-9]* rw, 146 147 # Docker needs to be able to create and load the profile it applies to 148 # containers ("docker-default") 149 /sbin/apparmor_parser ixr, 150 /etc/apparmor.d/cache/ r, # apparmor 2.12 and below 151 /etc/apparmor.d/cache/.features r, 152 /etc/apparmor.d/{,cache/}docker* rw, 153 /var/cache/apparmor/{,*/} r, # apparmor 2.13 and higher 154 /var/cache/apparmor/*/.features r, 155 /var/cache/apparmor/*/docker* rw, 156 /etc/apparmor.d/tunables/{,**} r, 157 /etc/apparmor.d/abstractions/{,**} r, 158 /etc/apparmor/parser.conf r, 159 /etc/apparmor/subdomain.conf r, 160 /sys/kernel/security/apparmor/.replace rw, 161 /sys/kernel/security/apparmor/{,**} r, 162 163 # use 'privileged-containers: true' to support --security-opts 164 165 # defaults for docker-default 166 change_profile unsafe /** -> docker-default, 167 signal (send) peer=docker-default, 168 ptrace (read, trace) peer=docker-default, 169 170 # defaults for containerd 171 change_profile unsafe /** -> cri-containerd.apparmor.d, 172 signal (send) peer=cri-containerd.apparmor.d, 173 ptrace (read, trace) peer=cri-containerd.apparmor.d, 174 175 # Graph (storage) driver bits 176 /{dev,run}/shm/aufs.xino mrw, 177 /proc/fs/aufs/plink_maint w, 178 /sys/fs/aufs/** r, 179 180 #cf bug 1502785 181 / r, 182 183 # recent versions of docker make a symlink from /dev/ptmx to /dev/pts/ptmx 184 # and so to allow allocating a new shell we need this 185 /dev/pts/ptmx rw, 186 187 # needed by runc for mitigation of CVE-2019-5736 188 # For details see https://bugs.launchpad.net/apparmor/+bug/1820344 189 / ix, 190 /bin/runc ixr, 191 192 /pause ixr, 193 /bin/busybox ixr, 194 195 # When kubernetes drives containerd, containerd needs access to CNI services, 196 # like flanneld's subnet.env for DNS. This would ideally be snap-specific (it 197 # could if the control plane was a snap), but in deployments where the control 198 # plane is not a snap, it will tell flannel to use this path. 199 /run/flannel/{,**} rk, 200 201 # When kubernetes drives containerd, containerd needs access to various 202 # secrets for the pods which are overlayed at /run/secrets/.... 203 # This would ideally be snap-specific (it could if the control plane was a 204 # snap), but in deployments where the control plane is not a snap, it will tell 205 # containerd to use this path for various account information for pods. 206 /run/secrets/kubernetes.io/{,**} rk, 207 ` 208 209 const dockerSupportConnectedPlugSecComp = ` 210 # Description: allow operating as the Docker daemon. This policy is 211 # intentionally not restrictive and is here to help guard against programming 212 # errors and not for security confinement. The Docker daemon by design requires 213 # extensive access to the system and cannot be effectively confined against 214 # malicious activity. 215 216 # Because seccomp may only go more strict, we must allow all syscalls to Docker 217 # that it expects to give to containers in addition to what it needs to run and 218 # trust that docker daemon # only gives out reasonable syscalls to containers. 219 220 # Docker includes these in the default container whitelist, but they're 221 # potentially dangerous. 222 #finit_module 223 #init_module 224 #query_module 225 #delete_module 226 227 # These have a history of vulnerabilities, are not widely used, and 228 # open_by_handle_at has been used to break out of Docker containers by brute 229 # forcing the handle value: http://stealth.openwall.net/xSports/shocker.c 230 #name_to_handle_at 231 #open_by_handle_at 232 233 # Calls the Docker daemon itself requires 234 235 # /snap/docker/VERSION/bin/docker-runc 236 # "do not inherit the parent's session keyring" 237 # "make session keyring searcheable" 238 # runC uses this to ensure the container doesn't have access to the host 239 # keyring 240 keyctl 241 242 # /snap/docker/VERSION/bin/docker-runc 243 pivot_root 244 245 # ptrace can be abused to break out of the seccomp sandbox 246 # but is required by the Docker daemon. 247 ptrace 248 249 # This list comes from Docker's default seccomp whitelist (which is applied to 250 # all containers launched unless a custom profile is specified or 251 # "--privileged" is used) 252 # https://github.com/docker/docker/blob/v1.12.0/profiles/seccomp/seccomp_default.go#L39-L1879 253 # It has been further filtered to exclude certain known-troublesome syscalls. 254 accept 255 accept4 256 access 257 acct 258 adjtimex 259 alarm 260 arch_prctl 261 bind 262 bpf 263 breakpoint 264 brk 265 cacheflush 266 capget 267 capset 268 chdir 269 chmod 270 chown 271 chown32 272 chroot 273 clock_getres 274 clock_getres_time64 275 clock_gettime 276 clock_gettime64 277 clock_nanosleep 278 clock_nanosleep_time64 279 clone 280 close 281 connect 282 copy_file_range 283 creat 284 dup 285 dup2 286 dup3 287 epoll_create 288 epoll_create1 289 epoll_ctl 290 epoll_ctl_old 291 epoll_pwait 292 epoll_wait 293 epoll_wait_old 294 eventfd 295 eventfd2 296 execve 297 execveat 298 exit 299 exit_group 300 faccessat 301 fadvise64 302 fadvise64_64 303 fallocate 304 fanotify_init 305 fanotify_mark 306 fchdir 307 fchmod 308 fchmodat 309 fchown 310 fchown32 311 fchownat 312 fcntl 313 fcntl64 314 fdatasync 315 fgetxattr 316 flistxattr 317 flock 318 fork 319 fremovexattr 320 fsetxattr 321 fstat 322 fstat64 323 fstatat64 324 fstatfs 325 fstatfs64 326 fsync 327 ftruncate 328 ftruncate64 329 futex 330 futex_time64 331 futimesat 332 getcpu 333 getcwd 334 getdents 335 getdents64 336 getegid 337 getegid32 338 geteuid 339 geteuid32 340 getgid 341 getgid32 342 getgroups 343 getgroups32 344 getitimer 345 getpeername 346 getpgid 347 getpgrp 348 getpid 349 getppid 350 getpriority 351 getrandom 352 getresgid 353 getresgid32 354 getresuid 355 getresuid32 356 getrlimit 357 get_robust_list 358 getrusage 359 getsid 360 getsockname 361 getsockopt 362 get_thread_area 363 get_tls 364 gettid 365 gettimeofday 366 getuid 367 getuid32 368 getxattr 369 inotify_add_watch 370 inotify_init 371 inotify_init1 372 inotify_rm_watch 373 io_cancel 374 ioctl 375 io_destroy 376 io_getevents 377 ioperm 378 iopl 379 ioprio_get 380 ioprio_set 381 io_setup 382 io_submit 383 ipc 384 kcmp 385 kill 386 lchown 387 lchown32 388 lgetxattr 389 link 390 linkat 391 listen 392 listxattr 393 llistxattr 394 _llseek 395 lookup_dcookie 396 lremovexattr 397 lseek 398 lsetxattr 399 lstat 400 lstat64 401 madvise 402 memfd_create 403 mincore 404 mkdir 405 mkdirat 406 mknod 407 mknodat 408 mlock 409 mlock2 410 mlockall 411 mmap 412 mmap2 413 modify_ldt 414 mount 415 mprotect 416 mq_getsetattr 417 mq_notify 418 mq_open 419 mq_timedreceive 420 mq_timedreceive_time64 421 mq_timedsend 422 mq_timedsend_time64 423 mq_unlink 424 mremap 425 msgctl 426 msgget 427 msgrcv 428 msgsnd 429 msync 430 munlock 431 munlockall 432 munmap 433 nanosleep 434 newfstatat 435 _newselect 436 open 437 openat 438 pause 439 perf_event_open 440 personality 441 pipe 442 pipe2 443 poll 444 ppoll 445 ppoll_time64 446 prctl 447 pread64 448 preadv 449 prlimit64 450 process_vm_readv 451 process_vm_writev 452 pselect6 453 pselect6_time64 454 pwrite64 455 pwritev 456 read 457 readahead 458 readlink 459 readlinkat 460 readv 461 reboot 462 recv 463 recvfrom 464 recvmmsg 465 recvmmsg_time64 466 recvmsg 467 remap_file_pages 468 removexattr 469 rename 470 renameat 471 renameat2 472 restart_syscall 473 rmdir 474 rt_sigaction 475 rt_sigpending 476 rt_sigprocmask 477 rt_sigqueueinfo 478 rt_sigreturn 479 rt_sigsuspend 480 rt_sigtimedwait 481 rt_sigtimedwait_time64 482 rt_tgsigqueueinfo 483 s390_pci_mmio_read 484 s390_pci_mmio_write 485 s390_runtime_instr 486 sched_getaffinity 487 sched_getattr 488 sched_getparam 489 sched_get_priority_max 490 sched_get_priority_min 491 sched_getscheduler 492 sched_rr_get_interval 493 sched_rr_get_interval_time64 494 sched_setaffinity 495 sched_setattr 496 sched_setparam 497 sched_setscheduler 498 sched_yield 499 seccomp 500 select 501 semctl 502 semget 503 semop 504 semtimedop 505 semtimedop_time64 506 send 507 sendfile 508 sendfile64 509 sendmmsg 510 sendmsg 511 sendto 512 setdomainname 513 setfsgid 514 setfsgid32 515 setfsuid 516 setfsuid32 517 setgid 518 setgid32 519 setgroups 520 setgroups32 521 sethostname 522 setitimer 523 setns 524 setpgid 525 setpriority 526 setregid 527 setregid32 528 setresgid 529 setresgid32 530 setresuid 531 setresuid32 532 setreuid 533 setreuid32 534 setrlimit 535 set_robust_list 536 setsid 537 setsockopt 538 set_thread_area 539 set_tid_address 540 settimeofday 541 set_tls 542 setuid 543 setuid32 544 setxattr 545 shmat 546 shmctl 547 shmdt 548 shmget 549 shutdown 550 sigaltstack 551 signalfd 552 signalfd4 553 sigreturn 554 socket 555 socketcall 556 socketpair 557 splice 558 stat 559 stat64 560 statfs 561 statfs64 562 stime 563 symlink 564 symlinkat 565 sync 566 sync_file_range 567 syncfs 568 sysinfo 569 syslog 570 tee 571 tgkill 572 time 573 timer_create 574 timer_delete 575 timerfd_create 576 timerfd_gettime 577 timerfd_gettime64 578 timerfd_settime 579 timerfd_settime64 580 timer_getoverrun 581 timer_gettime 582 timer_gettime64 583 timer_settime 584 timer_settime64 585 times 586 tkill 587 truncate 588 truncate64 589 ugetrlimit 590 umask 591 umount 592 umount2 593 uname 594 unlink 595 unlinkat 596 unshare 597 utime 598 utimensat 599 utimensat_time64 600 utimes 601 vfork 602 vhangup 603 vmsplice 604 wait4 605 waitid 606 waitpid 607 write 608 writev 609 ` 610 611 const dockerSupportPrivilegedAppArmor = ` 612 # Description: allow docker daemon to run privileged containers. This gives 613 # full access to all resources on the system and thus gives device ownership to 614 # connected snaps. 615 616 # These rules are here to allow Docker to launch unconfined containers but 617 # allow the docker daemon itself to go unconfined. Since it runs as root, this 618 # grants device ownership. 619 change_profile unsafe /**, 620 signal (send) peer=unconfined, 621 ptrace (read, trace) peer=unconfined, 622 623 # This grants raw access to device files and thus device ownership 624 /dev/** mrwkl, 625 @{PROC}/** mrwkl, 626 627 # When kubernetes drives docker/containerd, it creates and runs files in the 628 # container at arbitrary locations (eg, via pivot_root). 629 /** rwlix, 630 ` 631 632 const dockerSupportPrivilegedSecComp = ` 633 # Description: allow docker daemon to run privileged containers. This gives 634 # full access to all resources on the system and thus gives device ownership to 635 # connected snaps. 636 637 # This grants, among other things, kernel module loading and therefore device 638 # ownership. 639 @unrestricted 640 ` 641 642 type dockerSupportInterface struct{} 643 644 func (iface *dockerSupportInterface) Name() string { 645 return "docker-support" 646 } 647 648 func (iface *dockerSupportInterface) StaticInfo() interfaces.StaticInfo { 649 return interfaces.StaticInfo{ 650 Summary: dockerSupportSummary, 651 ImplicitOnCore: true, 652 ImplicitOnClassic: true, 653 BaseDeclarationPlugs: dockerSupportBaseDeclarationPlugs, 654 BaseDeclarationSlots: dockerSupportBaseDeclarationSlots, 655 } 656 } 657 658 var ( 659 parserFeatures = apparmor_sandbox.ParserFeatures 660 ) 661 662 func (iface *dockerSupportInterface) UDevConnectedPlug(spec *udev.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 663 spec.SetControlsDeviceCgroup() 664 665 return nil 666 } 667 668 func (iface *dockerSupportInterface) KModConnectedPlug(spec *kmod.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 669 // https://kubernetes.io/docs/setup/production-environment/container-runtimes/ 670 if err := spec.AddModule("overlay"); err != nil { 671 return err 672 } 673 return nil 674 } 675 676 func (iface *dockerSupportInterface) AppArmorConnectedPlug(spec *apparmor.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 677 var privileged bool 678 _ = plug.Attr("privileged-containers", &privileged) 679 680 // The 'change_profile unsafe' rules conflict with the 'ix' rules in 681 // the home interface, so suppress them (LP: #1797786) 682 spec.SetSuppressHomeIx() 683 spec.AddSnippet(dockerSupportConnectedPlugAppArmor) 684 if privileged { 685 spec.AddSnippet(dockerSupportPrivilegedAppArmor) 686 } 687 if !release.OnClassic { 688 spec.AddSnippet(dockerSupportConnectedPlugAppArmorCore) 689 } 690 spec.SetUsesPtraceTrace() 691 return nil 692 } 693 694 func (iface *dockerSupportInterface) SecCompConnectedPlug(spec *seccomp.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 695 var privileged bool 696 _ = plug.Attr("privileged-containers", &privileged) 697 snippet := dockerSupportConnectedPlugSecComp 698 if privileged { 699 snippet += dockerSupportPrivilegedSecComp 700 } 701 spec.AddSnippet(snippet) 702 return nil 703 } 704 705 func (iface *dockerSupportInterface) BeforePreparePlug(plug *snap.PlugInfo) error { 706 if v, ok := plug.Attrs["privileged-containers"]; ok { 707 if _, ok = v.(bool); !ok { 708 return fmt.Errorf("docker-support plug requires bool with 'privileged-containers'") 709 } 710 } 711 return nil 712 } 713 714 func (iface *dockerSupportInterface) AutoConnect(*snap.PlugInfo, *snap.SlotInfo) bool { 715 // allow what declarations allowed 716 return true 717 } 718 719 func init() { 720 registerIface(&dockerSupportInterface{}) 721 }