github.com/bugraaydogar/snapd@v0.0.0-20210315170335-8c70bb858939/interfaces/builtin/kubernetes_support.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2017-2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package builtin 21 22 import ( 23 "fmt" 24 "strings" 25 26 "github.com/snapcore/snapd/interfaces" 27 "github.com/snapcore/snapd/interfaces/apparmor" 28 "github.com/snapcore/snapd/interfaces/kmod" 29 "github.com/snapcore/snapd/interfaces/seccomp" 30 "github.com/snapcore/snapd/interfaces/udev" 31 "github.com/snapcore/snapd/snap" 32 ) 33 34 const kubernetesSupportSummary = `allows operating as the Kubernetes service` 35 36 const kubernetesSupportBaseDeclarationPlugs = ` 37 kubernetes-support: 38 allow-installation: false 39 deny-auto-connection: true 40 ` 41 42 const kubernetesSupportBaseDeclarationSlots = ` 43 kubernetes-support: 44 allow-installation: 45 slot-snap-type: 46 - core 47 deny-auto-connection: true 48 ` 49 50 const kubernetesSupportConnectedPlugAppArmorCommon = ` 51 # Common rules for running as a kubernetes node 52 53 # reading cgroups 54 capability sys_resource, 55 /sys/fs/cgroup/{,**} r, 56 57 # Allow adjusting the OOM score for containers. Note, this allows adjusting for 58 # all processes, not just containers. 59 @{PROC}/@{pid}/oom_score_adj rw, 60 @{PROC}/sys/vm/overcommit_memory rw, 61 /sys/kernel/mm/hugepages/{,**} r, 62 /sys/kernel/mm/transparent_hugepage/{,**} r, 63 64 capability dac_override, 65 66 /{,usr/}bin/systemd-run Cxr -> systemd_run, 67 /run/systemd/private r, 68 profile systemd_run (attach_disconnected,mediate_deleted) { 69 # Common rules for kubernetes use of systemd_run 70 #include <abstractions/base> 71 72 /{,usr/}bin/systemd-run rm, 73 owner @{PROC}/@{pid}/stat r, 74 owner @{PROC}/@{pid}/environ r, 75 @{PROC}/cmdline r, # proc_cmdline() 76 77 # setsockopt() 78 capability net_admin, 79 80 # systemd-run's detect_container() looks at several files to determine if it 81 # is running in a container. 82 @{PROC}/sys/kernel/osrelease r, 83 @{PROC}/1/sched r, 84 /run/systemd/container r, 85 86 # kubelet calls 'systemd-run --scope true' to determine if systemd is 87 # available and usable for calling certain mount commands under transient 88 # units as part of its lifecycle management. This requires ptrace 'read' on 89 # unconfined since systemd-run will call its detect_container() which will 90 # try to read /proc/1/environ. This is mediated via PTRACE_MODE_READ when 91 # run within kubelet's namespace. 92 ptrace (read) peer=unconfined, 93 /run/systemd/private rw, 94 95 # kubelet calling 'systemd-run --scope true' triggers this when kubelet is 96 # run in a nested container (eg, under lxd). 97 @{PROC}/1/cmdline r, 98 99 # Ubuntu's ptrace patchset before (at least) 20.04 did not correctly evaluate 100 # PTRACE_MODE_READ and policy required 'trace' instead of 'read'. 101 # (LP: #1890848). This child profile doesn't have 'capability sys_ptrace', so 102 # continue to allow this historic 'trace' rule on unconfined (which systemd 103 # runs as) since systemd-run won't be able to ptrace this snap's processes. 104 # This can be dropped once LP: #1890848 is fixed. 105 ptrace (trace) peer=unconfined, 106 107 /{,usr/}bin/true ixr, 108 @{INSTALL_DIR}/{@{SNAP_NAME},@{SNAP_INSTANCE_NAME}}/@{SNAP_REVISION}/{,usr/}bin/true ixr, 109 ###KUBERNETES_SUPPORT_SYSTEMD_RUN### 110 } 111 ` 112 113 const kubernetesSupportConnectedPlugAppArmorKubelet = ` 114 # Allow running as the kubelet service 115 116 # Ideally this would be snap-specific 117 /run/dockershim.sock rw, 118 119 # Ideally this would be snap-specific (it could if the control plane was a 120 # snap), but in deployments where the control plane is not a snap, it will tell 121 # flannel to use this path. 122 /run/flannel/{,**} rw, 123 /run/flannel/** k, 124 125 # allow managing pods' cgroups 126 /sys/fs/cgroup/*/kubepods/{,**} rw, 127 128 # kubelet can be configured to use the systemd cgroup driver which moves 129 # container processes into systemd-managed cgroups. This is now the recommended 130 # configuration since it provides a single cgroup manager (systemd) in an 131 # effort to achieve consistent views of resources. 132 /sys/fs/cgroup/*/systemd/{,system.slice/} rw, # create missing dirs 133 /sys/fs/cgroup/*/systemd/system.slice/** r, 134 /sys/fs/cgroup/*/systemd/system.slice/cgroup.procs w, 135 136 # Allow tracing our own processes. Note, this allows seccomp sandbox escape on 137 # kernels < 4.8 138 capability sys_ptrace, 139 ptrace (trace) peer=snap.@{SNAP_INSTANCE_NAME}.*, 140 141 # Allow ptracing other processes (as part of ps-style process lookups). Note, 142 # the peer needs a corresponding tracedby rule. As a special case, disallow 143 # ptracing unconfined. 144 ptrace (trace), 145 deny ptrace (trace) peer=unconfined, 146 147 @{PROC}/[0-9]*/attr/ r, 148 @{PROC}/[0-9]*/fdinfo/ r, 149 @{PROC}/[0-9]*/map_files/ r, 150 @{PROC}/[0-9]*/ns/{,*} r, 151 # dac_read_search needed for lstat'ing non-root owned ns/* files 152 capability dac_read_search, 153 154 # kubernetes will verify and set panic and panic_on_oops to values it considers 155 # sane 156 @{PROC}/sys/kernel/panic w, 157 @{PROC}/sys/kernel/panic_on_oops w, 158 @{PROC}/sys/kernel/keys/root_maxbytes r, 159 @{PROC}/sys/kernel/keys/root_maxkeys r, 160 161 /dev/kmsg r, 162 163 # kubelet calls out to systemd-run for some mounts, but not all of them and not 164 # unmounts... 165 capability sys_admin, 166 mount /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**} -> /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**}, 167 mount options=(rw, rshared) -> /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**}, 168 169 /{,usr/}bin/mount ixr, 170 /{,usr/}bin/umount ixr, 171 deny /run/mount/utab{,.lock} rw, 172 umount /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 173 174 # When fsGroup is set, the pod's volume will be recursively chowned with the 175 # setgid bit set on directories so new files will be owned by the fsGroup. See 176 # kubernetes pkg/volume/volume_linux.go:changeFilePermission() 177 capability fsetid, 178 ` 179 180 const kubernetesSupportConnectedPlugAppArmorKubeletSystemdRun = ` 181 # kubelet mount rules 182 capability sys_admin, 183 /{,usr/}bin/mount ixr, 184 mount fstype="tmpfs" tmpfs -> /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 185 deny /run/mount/utab{,.lock} rw, 186 187 # For mounting volume subPaths 188 mount /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**} -> /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**}, 189 mount options=(rw, remount, bind) -> /var/snap/@{SNAP_INSTANCE_NAME}/common/{,**}, 190 # nvme0-99, 1-63 partitions with 1-63 optional namespaces 191 mount /dev/nvme{[0-9],[1-9][0-9]}n{[1-9],[1-5][0-9],6[0-3]}{,p{[1-9],[1-5][0-9],6[0-3]}} -> /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 192 # SCSI sda-sdiv, 1-15 partitions 193 mount /dev/sd{[a-z],[a-h][a-z],i[a-v]}{[1-9],1[0-5]} -> /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 194 # virtio vda-vdz, 1-63 partitions 195 mount /dev/vd[a-z]{[1-9],[1-5][0-9],6[0-3]} -> /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 196 umount /var/snap/@{SNAP_INSTANCE_NAME}/common/**, 197 198 # When mounting a volume subPath, kubelet binds mounts on an open fd (eg, 199 # /proc/.../fd/N) which triggers a ptrace 'read' denial on the parent 200 # kubelet peer process from this child profile due to PTRACE_MODE_READ (man 201 # ptrace) checks. 202 ptrace (read) peer=snap.@{SNAP_INSTANCE_NAME}.@{SNAP_COMMAND_NAME}, 203 204 # Ubuntu's ptrace patchset before (at least) 20.04 did not correctly evaluate 205 # PTRACE_MODE_READ and policy required 'trace' instead of 'read'. 206 # (LP: #1890848). This child profile doesn't have 'capability sys_ptrace', so 207 # continue to allow this historic 'trace' rule on kubelet (our parent peer) 208 # since systemd-run won't be able to ptrace this snap's processes (kubelet 209 # would also need a corresponding tracedby rule). This can be dropped once 210 # LP: #1890848 is fixed. 211 ptrace (trace) peer=snap.@{SNAP_INSTANCE_NAME}.@{SNAP_COMMAND_NAME}, 212 ` 213 214 // k8s.io/apiserver/pkg/storage/etcd3/logger.go pulls in go-systemd via 215 // go.etcd.io/etcd/clientv3. See: 216 // https://github.com/coreos/go-systemd/blob/master/journal/journal.go#L211 217 const kubernetesSupportConnectedPlugAppArmorAutobindUnix = ` 218 # Allow using the 'autobind' feature of bind() (eg, for journald). 219 #unix (bind) type=dgram addr=none, 220 # Due to LP: 1867216, we cannot use the above rule and must instead use this 221 # less specific rule that allows bind() to arbitrary SOCK_DGRAM abstract socket 222 # names (separate send and receive rules are still required for communicating 223 # over the socket). 224 unix (bind) type=dgram, 225 ` 226 227 const kubernetesSupportConnectedPlugSeccompAutobindUnix = ` 228 # Allow using the 'autobind' feature of bind() (eg, for journald). 229 bind 230 ` 231 232 const kubernetesSupportConnectedPlugSeccompKubelet = ` 233 # Allow running as the kubelet service 234 mount 235 umount 236 umount2 237 238 unshare 239 setns - CLONE_NEWNET 240 241 # When fsGroup is set, the pod's volume will be recursively chowned with the 242 # setgid bit set on directories so new files will be owned by the fsGroup. See 243 # kubernetes pkg/volume/volume_linux.go:changeFilePermission() 244 fchownat 245 ` 246 247 var kubernetesSupportConnectedPlugUDevKubelet = []string{ 248 `KERNEL=="kmsg"`, 249 } 250 251 const kubernetesSupportConnectedPlugAppArmorKubeproxy = ` 252 # Allow running as the kubeproxy service 253 254 # managing our own cgroup 255 /sys/fs/cgroup/*/kube-proxy/{,**} rw, 256 257 # Allow reading the state of modules kubernetes needs 258 /sys/module/libcrc32c/initstate r, 259 /sys/module/llc/initstate r, 260 /sys/module/stp/initstate r, 261 /sys/module/ip_vs/initstate r, 262 /sys/module/ip_vs_rr/initstate r, 263 /sys/module/ip_vs_sh/initstate r, 264 /sys/module/ip_vs_wrr/initstate r, 265 ` 266 267 var kubernetesSupportConnectedPlugKmodKubeProxy = []string{ 268 `ip_vs_rr`, 269 `ip_vs_sh`, 270 `ip_vs_wrr`, 271 `libcrc32c`, 272 `llc`, 273 `stp`, 274 } 275 276 type kubernetesSupportInterface struct { 277 commonInterface 278 } 279 280 func (iface *kubernetesSupportInterface) ServicePermanentPlug(plug *snap.PlugInfo) []string { 281 // only autobind-unix flavor does not get Delegate=true, all other flavors 282 // are usable to manage control groups of processes/containers, and thus 283 // need Delegate=true 284 flavor := k8sFlavor(plug) 285 if flavor == "autobind-unix" { 286 return nil 287 } 288 289 return []string{"Delegate=true"} 290 } 291 292 func k8sFlavor(plug interfaces.Attrer) string { 293 var flavor string 294 _ = plug.Attr("flavor", &flavor) 295 return flavor 296 } 297 298 func (iface *kubernetesSupportInterface) AppArmorConnectedPlug(spec *apparmor.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 299 snippet := kubernetesSupportConnectedPlugAppArmorCommon 300 systemd_run_extra := "" 301 302 // All flavors should include the autobind-unix rules, but we break it 303 // out so other k8s daemons can use this flavor without getting the 304 // privileged rules. 305 switch k8sFlavor(plug) { 306 case "kubelet": 307 systemd_run_extra = kubernetesSupportConnectedPlugAppArmorKubeletSystemdRun 308 snippet += kubernetesSupportConnectedPlugAppArmorKubelet 309 snippet += kubernetesSupportConnectedPlugAppArmorAutobindUnix 310 spec.SetUsesPtraceTrace() 311 case "kubeproxy": 312 snippet += kubernetesSupportConnectedPlugAppArmorKubeproxy 313 snippet += kubernetesSupportConnectedPlugAppArmorAutobindUnix 314 case "autobind-unix": 315 snippet = kubernetesSupportConnectedPlugAppArmorAutobindUnix 316 default: 317 systemd_run_extra = kubernetesSupportConnectedPlugAppArmorKubeletSystemdRun 318 snippet += kubernetesSupportConnectedPlugAppArmorKubelet 319 snippet += kubernetesSupportConnectedPlugAppArmorKubeproxy 320 snippet += kubernetesSupportConnectedPlugAppArmorAutobindUnix 321 spec.SetUsesPtraceTrace() 322 } 323 324 old := "###KUBERNETES_SUPPORT_SYSTEMD_RUN###" 325 spec.AddSnippet(strings.Replace(snippet, old, systemd_run_extra, -1)) 326 return nil 327 } 328 329 func (iface *kubernetesSupportInterface) SecCompConnectedPlug(spec *seccomp.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 330 // All flavors should include the autobind-unix rules, but we add the 331 // privileged kubelet rules conditionally. 332 snippet := kubernetesSupportConnectedPlugSeccompAutobindUnix 333 flavor := k8sFlavor(plug) 334 if flavor == "kubelet" || flavor == "" { 335 snippet += kubernetesSupportConnectedPlugSeccompKubelet 336 } 337 spec.AddSnippet(snippet) 338 return nil 339 } 340 341 func (iface *kubernetesSupportInterface) UDevConnectedPlug(spec *udev.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 342 flavor := k8sFlavor(plug) 343 if flavor == "kubelet" || flavor == "" { 344 for _, rule := range kubernetesSupportConnectedPlugUDevKubelet { 345 spec.TagDevice(rule) 346 } 347 } 348 return nil 349 } 350 351 func (iface *kubernetesSupportInterface) KModConnectedPlug(spec *kmod.Specification, plug *interfaces.ConnectedPlug, slot *interfaces.ConnectedSlot) error { 352 flavor := k8sFlavor(plug) 353 if flavor == "kubeproxy" || flavor == "" { 354 for _, m := range kubernetesSupportConnectedPlugKmodKubeProxy { 355 if err := spec.AddModule(m); err != nil { 356 return err 357 } 358 } 359 } 360 return nil 361 } 362 363 func (iface *kubernetesSupportInterface) BeforePreparePlug(plug *snap.PlugInfo) error { 364 // It's fine if flavor isn't specified, but if it is, it needs to be 365 // either "kubelet", "kubeproxy" or "autobind-unix" 366 if t, ok := plug.Attrs["flavor"]; ok && t != "kubelet" && t != "kubeproxy" && t != "autobind-unix" { 367 return fmt.Errorf(`kubernetes-support plug requires "flavor" to be either "kubelet", "kubeproxy" or "autobind-unix"`) 368 } 369 370 return nil 371 } 372 373 func init() { 374 registerIface(&kubernetesSupportInterface{commonInterface{ 375 name: "kubernetes-support", 376 summary: kubernetesSupportSummary, 377 implicitOnClassic: true, 378 implicitOnCore: true, 379 baseDeclarationPlugs: kubernetesSupportBaseDeclarationPlugs, 380 baseDeclarationSlots: kubernetesSupportBaseDeclarationSlots, 381 }}) 382 }