github.com/jingruilea/kubeedge@v1.2.0-beta.0.0.20200410162146-4bb8902b3879/edge/pkg/edged/edged_pods.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 16 @CHANGELOG 17 KubeEdge Authors: To create mini-kubelet for edge deployment scenario, 18 This file is derived from K8S Kubelet code with reduced set of methods 19 Changes done are 20 1. Package edged got some functions from "k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go" 21 and made some variant 22 2. Simplify the function of makeEnvironmentVariables, 23 which is used to inject environment variables into containers. 24 Currently the users can set the environment variables directly or 25 by reading from the attributes of Spec.nodeName and spec.serviceAccountName. 26 */ 27 28 package edged 29 30 import ( 31 "bytes" 32 "fmt" 33 "io/ioutil" 34 "net" 35 "os" 36 "path" 37 "path/filepath" 38 "runtime" 39 "sort" 40 "strings" 41 42 v1 "k8s.io/api/core/v1" 43 apierrors "k8s.io/apimachinery/pkg/api/errors" 44 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 45 "k8s.io/apimachinery/pkg/types" 46 utilfeature "k8s.io/apiserver/pkg/util/feature" 47 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 48 "k8s.io/klog" 49 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 50 podshelper "k8s.io/kubernetes/pkg/apis/core/pods" 51 v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" 52 "k8s.io/kubernetes/pkg/features" 53 "k8s.io/kubernetes/pkg/fieldpath" 54 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 55 "k8s.io/kubernetes/pkg/kubelet/status" 56 kubetypes "k8s.io/kubernetes/pkg/kubelet/types" 57 "k8s.io/kubernetes/pkg/kubelet/util/format" 58 "k8s.io/kubernetes/pkg/volume/util" 59 "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" 60 "k8s.io/kubernetes/pkg/volume/validation" 61 "k8s.io/kubernetes/third_party/forked/golang/expansion" 62 utilfile "k8s.io/utils/path" 63 64 edgedconfig "github.com/kubeedge/kubeedge/edge/pkg/edged/config" 65 ) 66 67 const ( 68 etcHostsPath = "/etc/hosts" 69 systemdSuffix = ".slice" 70 ) 71 72 // GetActivePods returns non-terminal pods 73 func (e *edged) GetActivePods() []*v1.Pod { 74 allPods := e.podManager.GetPods() 75 activePods := e.filterOutTerminatedPods(allPods) 76 return activePods 77 } 78 79 // filterOutTerminatedPods returns the given pods which the status manager 80 // does not consider failed or succeeded. 81 func (e *edged) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod { 82 var filteredPods []*v1.Pod 83 for _, p := range pods { 84 if e.podIsTerminated(p) { 85 continue 86 } 87 filteredPods = append(filteredPods, p) 88 } 89 return filteredPods 90 } 91 92 // truncatePodHostnameIfNeeded truncates the pod hostname if it's longer than 63 chars. 93 func truncatePodHostnameIfNeeded(podName, hostname string) (string, error) { 94 // Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char). 95 const hostnameMaxLen = 63 96 if len(hostname) <= hostnameMaxLen { 97 return hostname, nil 98 } 99 truncated := hostname[:hostnameMaxLen] 100 klog.Errorf("hostname for pod:%q was longer than %d. Truncated hostname to :%q", podName, hostnameMaxLen, truncated) 101 // hostname should not end with '-' or '.' 102 truncated = strings.TrimRight(truncated, "-.") 103 if len(truncated) == 0 { 104 // This should never happen. 105 return "", fmt.Errorf("hostname for pod %q was invalid: %q", podName, hostname) 106 } 107 return truncated, nil 108 } 109 110 // GeneratePodHostNameAndDomain creates a hostname and domain name for a pod, 111 // given that pod's spec and annotations or returns an error. 112 func (e *edged) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) { 113 // TODO(vmarmol): Handle better. 114 clusterDomain := "cluster" 115 116 hostname := pod.Name 117 if len(pod.Spec.Hostname) > 0 { 118 hostname = pod.Spec.Hostname 119 } 120 121 hostname, err := truncatePodHostnameIfNeeded(pod.Name, hostname) 122 if err != nil { 123 return "", "", err 124 } 125 126 hostDomain := "" 127 if len(pod.Spec.Subdomain) > 0 { 128 hostDomain = fmt.Sprintf("%s.%s.svc.%s", pod.Spec.Subdomain, pod.Namespace, clusterDomain) 129 } 130 131 return hostname, hostDomain, nil 132 } 133 134 // Get a list of pods that have data directories. 135 func (e *edged) listPodsFromDisk() ([]types.UID, error) { 136 podInfos, err := ioutil.ReadDir(e.getPodsDir()) 137 if err != nil { 138 return nil, err 139 } 140 pods := []types.UID{} 141 for i := range podInfos { 142 if podInfos[i].IsDir() { 143 pods = append(pods, types.UID(podInfos[i].Name())) 144 } 145 } 146 return pods, nil 147 } 148 149 // hasHostNamespace returns true if hostIPC, hostNetwork, or hostPID are set to true. 150 func hasHostNamespace(pod *v1.Pod) bool { 151 if pod.Spec.SecurityContext == nil { 152 return false 153 } 154 return pod.Spec.HostIPC || pod.Spec.HostNetwork || pod.Spec.HostPID 155 } 156 157 // hasHostVolume returns true if the pod spec has a HostPath volume. 158 func hasHostVolume(pod *v1.Pod) bool { 159 for _, v := range pod.Spec.Volumes { 160 if v.HostPath != nil { 161 return true 162 } 163 } 164 return false 165 } 166 167 // hasNonNamespacedCapability returns true if MKNOD, SYS_TIME, or SYS_MODULE is requested for any container. 168 func hasNonNamespacedCapability(pod *v1.Pod) bool { 169 for _, c := range pod.Spec.Containers { 170 if c.SecurityContext != nil && c.SecurityContext.Capabilities != nil { 171 for _, cap := range c.SecurityContext.Capabilities.Add { 172 if cap == "MKNOD" || cap == "SYS_TIME" || cap == "SYS_MODULE" { 173 return true 174 } 175 } 176 } 177 } 178 179 return false 180 } 181 182 // HasPrivilegedContainer returns true if any of the containers in the pod are privileged. 183 func hasPrivilegedContainer(pod *v1.Pod) bool { 184 for _, c := range pod.Spec.Containers { 185 if c.SecurityContext != nil && 186 c.SecurityContext.Privileged != nil && 187 *c.SecurityContext.Privileged { 188 return true 189 } 190 } 191 return false 192 } 193 194 // enableHostUserNamespace determines if the host user namespace should be used by the container runtime. 195 // Returns true if the pod is using a host pid, pic, or network namespace, the pod is using a non-namespaced 196 // capability, the pod contains a privileged container, or the pod has a host path volume. 197 // 198 // NOTE: when if a container shares any namespace with another container it must also share the user namespace 199 // or it will not have the correct capabilities in the namespace. This means that host user namespace 200 // is enabled per pod, not per container. 201 func (e *edged) enableHostUserNamespace(pod *v1.Pod) bool { 202 if hasPrivilegedContainer(pod) || hasHostNamespace(pod) || 203 hasHostVolume(pod) || hasNonNamespacedCapability(pod) { 204 return true 205 } 206 return false 207 } 208 209 // podIsTerminated returns true if pod is in the terminated state ("Failed" or "Succeeded"). 210 func (e *edged) podIsTerminated(pod *v1.Pod) bool { 211 // Check the cached pod status which was set after the last sync. 212 status, ok := e.statusManager.GetPodStatus(pod.UID) 213 if !ok { 214 // If there is no cached status, use the status from the 215 // apiserver. This is useful if kubelet has recently been 216 // restarted. 217 status = pod.Status 218 } 219 220 return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) 221 } 222 223 // makePodDataDirs creates the dirs for the pod datas. 224 func (e *edged) makePodDataDirs(pod *v1.Pod) error { 225 uid := pod.UID 226 if err := os.MkdirAll(e.getPodDir(uid), 0750); err != nil && !os.IsExist(err) { 227 return err 228 } 229 if err := os.MkdirAll(e.getPodVolumesDir(uid), 0750); err != nil && !os.IsExist(err) { 230 return err 231 } 232 if err := os.MkdirAll(e.getPodPluginsDir(uid), 0750); err != nil && !os.IsExist(err) { 233 return err 234 } 235 return nil 236 } 237 238 func (e *edged) makePodDir() error { 239 if err := os.MkdirAll(e.getPodsDir(), 0750); err != nil && !os.IsExist(err) { 240 return err 241 } 242 return nil 243 } 244 245 // notRunning returns true if every status is terminated or waiting, or the status list 246 // is empty. 247 func notRunning(statuses []v1.ContainerStatus) bool { 248 for _, status := range statuses { 249 if status.State.Terminated == nil && status.State.Waiting == nil { 250 return false 251 } 252 } 253 return true 254 } 255 256 func (e *edged) GenerateContainerOptions(pod *v1.Pod) (*kubecontainer.RunContainerOptions, error) { 257 opts := kubecontainer.RunContainerOptions{} 258 hostname, hostDomainName, err := e.GeneratePodHostNameAndDomain(pod) 259 if err != nil { 260 return nil, err 261 } 262 podName := util.GetUniquePodName(pod) 263 volumes := e.volumeManager.GetMountedVolumesForPod(podName) 264 for _, container := range pod.Spec.Containers { 265 mounts, err := makeMounts(pod, e.getPodDir(pod.UID), &container, hostname, hostDomainName, pod.Status.PodIP, volumes) 266 if err != nil { 267 return nil, err 268 } 269 opts.Mounts = append(opts.Mounts, mounts...) 270 } 271 272 return &opts, nil 273 } 274 275 // makeMounts determines the mount points for the given container. 276 func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) { 277 // Kubernetes only mounts on /etc/hosts if: 278 // - container is not an infrastructure (pause) container 279 // - container is not already mounting on /etc/hosts 280 // - OS is not Windows 281 // Kubernetes will not mount /etc/hosts if: 282 // - when the Pod sandbox is being created, its IP is still unknown. Hence, PodIP will not have been set. 283 mountEtcHostsFile := len(podIP) > 0 && runtime.GOOS != "windows" 284 klog.Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile) 285 mounts := []kubecontainer.Mount{} 286 for _, mount := range container.VolumeMounts { 287 // do not mount /etc/hosts if container is already mounting on the path 288 mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath) 289 vol, ok := podVolumes[mount.Name] 290 if !ok || vol.Mounter == nil { 291 klog.Errorf("Mount cannot be satisfied for container %q, because the volume is missing or the volume mounter is nil: %+v", container.Name, mount) 292 return nil, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name) 293 } 294 295 relabelVolume := false 296 // If the volume supports SELinux and it has not been 297 // relabeled already and it is not a read-only volume, 298 // relabel it and mark it as labeled 299 if vol.Mounter.GetAttributes().Managed && vol.Mounter.GetAttributes().SupportsSELinux && !vol.SELinuxLabeled { 300 vol.SELinuxLabeled = true 301 relabelVolume = true 302 } 303 hostPath, err := util.GetPath(vol.Mounter) 304 if err != nil { 305 return nil, err 306 } 307 if mount.SubPath != "" { 308 if filepath.IsAbs(mount.SubPath) { 309 return nil, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) 310 } 311 312 err = validation.ValidatePathNoBacksteps(mount.SubPath) 313 if err != nil { 314 return nil, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) 315 } 316 317 fileinfo, err := os.Lstat(hostPath) 318 if err != nil { 319 return nil, err 320 } 321 perm := fileinfo.Mode() 322 323 hostPath = filepath.Join(hostPath, mount.SubPath) 324 325 if subPathExists, err := utilfile.Exists(utilfile.CheckSymlinkOnly, hostPath); err != nil { 326 klog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath) 327 } else if !subPathExists { 328 // Create the sub path now because if it's auto-created later when referenced, it may have an 329 // incorrect ownership and mode. For example, the sub path directory must have at least g+rwx 330 // when the pod specifies an fsGroup, and if the directory is not created here, Docker will 331 // later auto-create it with the incorrect mode 0750 332 if err := os.MkdirAll(hostPath, perm); err != nil { 333 klog.Errorf("failed to mkdir:%s", hostPath) 334 return nil, err 335 } 336 337 // chmod the sub path because umask may have prevented us from making the sub path with the same 338 // permissions as the mounter path 339 if err := os.Chmod(hostPath, perm); err != nil { 340 return nil, err 341 } 342 } 343 } 344 345 // Docker Volume Mounts fail on Windows if it is not of the form C:/ 346 containerPath := mount.MountPath 347 if runtime.GOOS == "windows" { 348 if (strings.HasPrefix(hostPath, "/") || strings.HasPrefix(hostPath, "\\")) && !strings.Contains(hostPath, ":") { 349 hostPath = "c:" + hostPath 350 } 351 } 352 if !filepath.IsAbs(containerPath) { 353 containerPath = makeAbsolutePath(runtime.GOOS, containerPath) 354 } 355 356 // Extend the path according to extend type of mount volume, by appending the pod metadata to the path. 357 // TODO: this logic is added by Huawei, make sure what this for and remove it 358 // extendVolumePath := volumehelper.GetExtendVolumePath(pod, container, mount.ExtendPathMode) 359 // if extendVolumePath != "" { 360 // hostPath = filepath.Join(hostPath, extendVolumePath) 361 // } 362 propagation, err := translateMountPropagation(mount.MountPropagation) 363 if err != nil { 364 return nil, err 365 } 366 klog.Infof("Pod %q container %q mount %q has propagation %q", format.Pod(pod), container.Name, mount.Name, propagation) 367 368 mounts = append(mounts, kubecontainer.Mount{ 369 Name: mount.Name, 370 ContainerPath: containerPath, 371 HostPath: hostPath, 372 ReadOnly: mount.ReadOnly, 373 SELinuxRelabel: relabelVolume, 374 Propagation: propagation, 375 }) 376 } 377 if mountEtcHostsFile { 378 hostAliases := pod.Spec.HostAliases 379 hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork) 380 if err != nil { 381 return nil, err 382 } 383 mounts = append(mounts, *hostsMount) 384 } 385 return mounts, nil 386 } 387 388 func makeAbsolutePath(goos, path string) string { 389 if goos != "windows" { 390 return "/" + path 391 } 392 // These are all for windows 393 // If there is a colon, give up. 394 if strings.Contains(path, ":") { 395 return path 396 } 397 // If there is a slash, but no drive, add 'c:' 398 if strings.HasPrefix(path, "/") || strings.HasPrefix(path, "\\") { 399 return "c:" + path 400 } 401 // Otherwise, add 'c:\' 402 return "c:\\" + path 403 } 404 405 // translateMountPropagation transforms v1.MountPropagationMode to 406 // runtimeapi.MountPropagation. 407 func translateMountPropagation(mountMode *v1.MountPropagationMode) (runtimeapi.MountPropagation, error) { 408 switch { 409 case mountMode == nil: 410 // HostToContainer is the default 411 return runtimeapi.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, nil 412 case *mountMode == v1.MountPropagationHostToContainer: 413 return runtimeapi.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, nil 414 case *mountMode == v1.MountPropagationBidirectional: 415 return runtimeapi.MountPropagation_PROPAGATION_BIDIRECTIONAL, nil 416 default: 417 return 0, fmt.Errorf("invalid MountPropagation mode: %v", mountMode) 418 } 419 } 420 421 // makeHostsMount makes the mountpoint for the hosts file that the containers 422 // in a pod are injected with. 423 func makeHostsMount(podDir, podIP, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) (*kubecontainer.Mount, error) { 424 hostsFilePath := path.Join(podDir, "etc-hosts") 425 if err := ensureHostsFile(hostsFilePath, podIP, hostName, hostDomainName, hostAliases, useHostNetwork); err != nil { 426 return nil, err 427 } 428 return &kubecontainer.Mount{ 429 Name: "k8s-managed-etc-hosts", 430 ContainerPath: etcHostsPath, 431 HostPath: hostsFilePath, 432 ReadOnly: false, 433 SELinuxRelabel: true, 434 }, nil 435 } 436 437 // ensureHostsFile ensures that the given host file has an up-to-date ip, host 438 // name, and domain name. 439 func ensureHostsFile(fileName, hostIP, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) error { 440 var hostsFileContent []byte 441 var err error 442 443 if useHostNetwork { 444 // if Pod is using host network, read hosts file from the node's filesystem. 445 // `etcHostsPath` references the location of the hosts file on the node. 446 // `/etc/hosts` for *nix systems. 447 hostsFileContent, err = nodeHostsFileContent(etcHostsPath, hostAliases) 448 if err != nil { 449 return err 450 } 451 } else { 452 // if Pod is not using host network, create a managed hosts file with Pod IP and other information. 453 hostsFileContent = managedHostsFileContent(hostIP, hostName, hostDomainName, hostAliases) 454 } 455 456 return ioutil.WriteFile(fileName, hostsFileContent, 0644) 457 } 458 459 // nodeHostsFileContent reads the content of node's hosts file. 460 func nodeHostsFileContent(hostsFilePath string, hostAliases []v1.HostAlias) ([]byte, error) { 461 hostsFileContent, err := ioutil.ReadFile(hostsFilePath) 462 if err != nil { 463 return nil, err 464 } 465 hostsFileContent = append(hostsFileContent, hostsEntriesFromHostAliases(hostAliases)...) 466 return hostsFileContent, nil 467 } 468 469 func hostsEntriesFromHostAliases(hostAliases []v1.HostAlias) []byte { 470 if len(hostAliases) == 0 { 471 return []byte{} 472 } 473 474 var buffer bytes.Buffer 475 buffer.WriteString("\n") 476 buffer.WriteString("# Entries added by HostAliases.\n") 477 // write each IP/hostname pair as an entry into hosts file 478 for _, hostAlias := range hostAliases { 479 for _, hostname := range hostAlias.Hostnames { 480 buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostAlias.IP, hostname)) 481 } 482 } 483 return buffer.Bytes() 484 } 485 486 // managedHostsFileContent generates the content of the managed etc hosts based on Pod IP and other 487 // information. 488 func managedHostsFileContent(hostIP, hostName, hostDomainName string, hostAliases []v1.HostAlias) []byte { 489 var buffer bytes.Buffer 490 buffer.WriteString("# Kubernetes-managed hosts file.\n") 491 buffer.WriteString("127.0.0.1\tlocalhost\n") // ipv4 localhost 492 buffer.WriteString("::1\tlocalhost ip6-localhost ip6-loopback\n") // ipv6 localhost 493 buffer.WriteString("fe00::0\tip6-localnet\n") 494 buffer.WriteString("fe00::0\tip6-mcastprefix\n") 495 buffer.WriteString("fe00::1\tip6-allnodes\n") 496 buffer.WriteString("fe00::2\tip6-allrouters\n") 497 if len(hostDomainName) > 0 { 498 buffer.WriteString(fmt.Sprintf("%s\t%s.%s\t%s\n", hostIP, hostName, hostDomainName, hostName)) 499 } else { 500 buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostIP, hostName)) 501 } 502 hostsFileContent := buffer.Bytes() 503 hostsFileContent = append(hostsFileContent, hostsEntriesFromHostAliases(hostAliases)...) 504 return hostsFileContent 505 } 506 507 // IsPodTerminated returns trus if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded") 508 // or if the pod has been deleted or removed 509 func (e *edged) IsPodTerminated(uid types.UID) bool { 510 pod, podFound := e.podManager.GetPodByUID(uid) 511 if !podFound { 512 return true 513 } 514 return e.podIsTerminated(pod) 515 } 516 517 func podIsEvicted(podStatus v1.PodStatus) bool { 518 return podStatus.Phase == v1.PodFailed && podStatus.Reason == "Evicted" 519 } 520 521 // IsPodDeleted returns true if the pod is deleted. For the pod to be deleted, either: 522 // 1. The pod object is deleted 523 // 2. The pod's status is evicted 524 // 3. The pod's deletion timestamp is set, and containers are not running 525 func (e *edged) IsPodDeleted(uid types.UID) bool { 526 pod, podFound := e.podManager.GetPodByUID(uid) 527 if !podFound { 528 return true 529 } 530 status, statusFound := e.statusManager.GetPodStatus(pod.UID) 531 if !statusFound { 532 status = pod.Status 533 } 534 return podIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) 535 } 536 537 // removeOrphanedPodStatuses removes obsolete entries in podStatus where 538 // the pod is no longer considered bound to this node. 539 func (e *edged) removeOrphanedPodStatuses(pods []*v1.Pod) { 540 podUIDs := make(map[types.UID]bool) 541 for _, pod := range pods { 542 podUIDs[pod.UID] = true 543 } 544 545 e.statusManager.RemoveOrphanedStatuses(podUIDs) 546 } 547 548 // GetPodCgroupParent gets pod cgroup parent from container manager. 549 func (e *edged) GetPodCgroupParent(pod *v1.Pod) string { 550 ret := e.cgroupDriver 551 if edgedconfig.Config.RemoteRuntimeEndpoint == DockerShimEndpoint || 552 edgedconfig.Config.RemoteRuntimeEndpoint == DockerShimEndpointDeprecated { 553 //always have a ".slice" suffix 554 ret = ret + systemdSuffix 555 } 556 return ret 557 } 558 559 // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by 560 // the container runtime to set parameters for launching a container. 561 func (e *edged) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) { 562 /*opts, err := e.GenerateContainerOptions(pod) 563 if err != nil { 564 return nil, nil, err 565 }*/ 566 opts := kubecontainer.RunContainerOptions{} 567 568 hostname, hostDomainName, err := e.GeneratePodHostNameAndDomain(pod) 569 if err != nil { 570 return nil, nil, err 571 } 572 opts.Hostname = hostname 573 podName := util.GetUniquePodName(pod) 574 volumes := e.volumeManager.GetMountedVolumesForPod(podName) 575 opts.PortMappings = kubecontainer.MakePortMappings(container) 576 577 // TODO: remove feature gate check after no longer needed 578 if utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) { 579 blkutil := volumepathhandler.NewBlockVolumePathHandler() 580 blkVolumes, err := e.makeBlockVolumes(pod, container, volumes, blkutil) 581 if err != nil { 582 return nil, nil, err 583 } 584 opts.Devices = append(opts.Devices, blkVolumes...) 585 } 586 587 envs, err := e.makeEnvironmentVariables(pod, container, podIP, podIPs) 588 if err != nil { 589 return nil, nil, err 590 } 591 opts.Envs = append(opts.Envs, envs...) 592 593 mounts, err := makeMounts(pod, e.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes) 594 if err != nil { 595 return nil, nil, err 596 } 597 opts.Mounts = append(opts.Mounts, mounts...) 598 599 // Disabling adding TerminationMessagePath on Windows as these files would be mounted as docker volume and 600 // Docker for Windows has a bug where only directories can be mounted 601 if len(container.TerminationMessagePath) != 0 && runtime.GOOS != "windows" { 602 p := e.getPodContainerDir(pod.UID, container.Name) 603 if err := os.MkdirAll(p, 0750); err != nil { 604 klog.Errorf("Error on creating %q: %v", p, err) 605 } else { 606 opts.PodContainerDir = p 607 } 608 } 609 610 return &opts, nil, nil 611 } 612 613 // GetPodDNS returns DNS settings for the pod. 614 // This function is defined in kubecontainer.RuntimeHelper interface so we 615 // have to implement it. 616 func (e *edged) GetPodDNS(pod *v1.Pod) (*runtimeapi.DNSConfig, error) { 617 dnsConfig, err := e.dnsConfigurer.GetPodDNS(pod) 618 return dnsConfig, err 619 } 620 621 // Make the environment variables for a pod in the given namespace. 622 func (e *edged) makeEnvironmentVariables(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) ([]kubecontainer.EnvVar, error) { 623 624 var result []kubecontainer.EnvVar 625 626 var err error 627 628 // Determine the final values of variables: 629 // 630 // 1. Determine the final value of each variable: 631 // a. If the variable's Value is set, expand the `$(var)` references to other 632 // variables in the .Value field; the sources of variables are the declared 633 // variables of the container and the service environment variables 634 // b. If a source is defined for an environment variable, resolve the source 635 // 2. Create the container's environment in the order variables are declared 636 // 3. Add remaining service environment vars 637 var ( 638 configMaps = make(map[string]*v1.ConfigMap) 639 tmpEnv = make(map[string]string) 640 mappingFunc = expansion.MappingFuncFor(tmpEnv) 641 ) 642 643 for _, envVar := range container.Env { 644 runtimeVal := envVar.Value 645 if runtimeVal != "" { 646 // Step 1a: expand variable references 647 runtimeVal = expansion.Expand(runtimeVal, mappingFunc) 648 } else if envVar.ValueFrom != nil { 649 // Step 1b: resolve alternate env var sources 650 switch { 651 case envVar.ValueFrom.FieldRef != nil: 652 runtimeVal, err = e.podFieldSelectorRuntimeValue(envVar.ValueFrom.FieldRef, pod, podIP, podIPs) 653 if err != nil { 654 return result, err 655 } 656 case envVar.ValueFrom.ConfigMapKeyRef != nil: 657 cm := envVar.ValueFrom.ConfigMapKeyRef 658 name := cm.Name 659 key := cm.Key 660 optional := cm.Optional != nil && *cm.Optional 661 configMap, ok := configMaps[name] 662 if !ok { 663 if e.kubeClient == nil { 664 return result, fmt.Errorf("Couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name) 665 } 666 configMap, err = e.configMapManager.GetConfigMap(pod.Namespace, name) 667 if err != nil { 668 if apierrors.IsNotFound(err) && optional { 669 // ignore error when marked optional 670 continue 671 } 672 return result, err 673 } 674 configMaps[name] = configMap 675 } 676 runtimeVal, ok = configMap.Data[key] 677 if !ok { 678 if optional { 679 continue 680 } 681 return result, fmt.Errorf("Couldn't find key %v in ConfigMap %v/%v", key, pod.Namespace, name) 682 } 683 } 684 } 685 686 tmpEnv[envVar.Name] = runtimeVal 687 } 688 689 // Append the env vars 690 for k, v := range tmpEnv { 691 result = append(result, kubecontainer.EnvVar{Name: k, Value: v}) 692 } 693 return result, nil 694 } 695 696 // podFieldSelectorRuntimeValue returns the runtime value of the given 697 // selector for a pod. 698 func (e *edged) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod *v1.Pod, podIP string, podIPs []string) (string, error) { 699 internalFieldPath, _, err := podshelper.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "") 700 if err != nil { 701 return "", err 702 } 703 switch internalFieldPath { 704 case "spec.nodeName": 705 return pod.Spec.NodeName, nil 706 case "spec.serviceAccountName": 707 return pod.Spec.ServiceAccountName, nil 708 // TODO: Add status.hostIP here 709 case "status.podIP": 710 return podIP, nil 711 case "status.podIPs": 712 return strings.Join(podIPs, ","), nil 713 } 714 return fieldpath.ExtractFieldPathAsString(pod, internalFieldPath) 715 } 716 717 // makeBlockVolumes maps the raw block devices specified in the path of the container 718 // Experimental 719 func (e *edged) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVolumes kubecontainer.VolumeMap, blkutil volumepathhandler.BlockVolumePathHandler) ([]kubecontainer.DeviceInfo, error) { 720 var devices []kubecontainer.DeviceInfo 721 for _, device := range container.VolumeDevices { 722 // check path is absolute 723 if !filepath.IsAbs(device.DevicePath) { 724 return nil, fmt.Errorf("error DevicePath `%s` must be an absolute path", device.DevicePath) 725 } 726 vol, ok := podVolumes[device.Name] 727 if !ok || vol.BlockVolumeMapper == nil { 728 klog.Errorf("Block volume cannot be satisfied for container %q, because the volume is missing or the volume mapper is nil: %+v", container.Name, device) 729 return nil, fmt.Errorf("cannot find volume %q to pass into container %q", device.Name, container.Name) 730 } 731 // Get a symbolic link associated to a block device under pod device path 732 dirPath, volName := vol.BlockVolumeMapper.GetPodDeviceMapPath() 733 symlinkPath := path.Join(dirPath, volName) 734 if islinkExist, checkErr := blkutil.IsSymlinkExist(symlinkPath); checkErr != nil { 735 return nil, checkErr 736 } else if islinkExist { 737 // Check readOnly in PVCVolumeSource and set read only permission if it's true. 738 permission := "mrw" 739 if vol.ReadOnly { 740 permission = "r" 741 } 742 klog.V(4).Infof("Device will be attached to container %q. Path on host: %v", container.Name, symlinkPath) 743 devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: symlinkPath, PathInContainer: device.DevicePath, Permissions: permission}) 744 } 745 } 746 747 return devices, nil 748 } 749 750 // convertStatusToAPIStatus creates an api PodStatus for the given pod from 751 // the given internal pod status. It is purely transformative and does not 752 // alter the kubelet state at all. 753 func (e *edged) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus { 754 var apiPodStatus v1.PodStatus 755 756 hostIP, err := e.getHostIPByInterface() 757 if err != nil { 758 klog.Errorf("Failed to get host IP: %v", err) 759 } else { 760 apiPodStatus.HostIP = hostIP 761 762 apiPodStatus.PodIPs = make([]v1.PodIP, 0, len(podStatus.IPs)) 763 for _, ip := range podStatus.IPs { 764 apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{ 765 IP: ip, 766 }) 767 } 768 769 if len(apiPodStatus.PodIPs) > 0 { 770 apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP 771 } 772 773 if pod.Spec.HostNetwork && apiPodStatus.PodIP == "" { 774 apiPodStatus.PodIP = hostIP 775 } 776 } 777 // set status for Pods created on versions of kube older than 1.6 778 apiPodStatus.QOSClass = v1qos.GetPodQOS(pod) 779 780 oldPodStatus, found := e.statusManager.GetPodStatus(pod.UID) 781 if !found { 782 oldPodStatus = pod.Status 783 } 784 785 apiPodStatus.ContainerStatuses = e.convertToAPIContainerStatuses( 786 pod, podStatus, 787 oldPodStatus.ContainerStatuses, 788 pod.Spec.Containers, 789 len(pod.Spec.InitContainers) > 0, 790 false, 791 ) 792 apiPodStatus.InitContainerStatuses = e.convertToAPIContainerStatuses( 793 pod, podStatus, 794 oldPodStatus.InitContainerStatuses, 795 pod.Spec.InitContainers, 796 len(pod.Spec.InitContainers) > 0, 797 true, 798 ) 799 800 return &apiPodStatus 801 } 802 803 // convertToAPIContainerStatuses converts the given internal container 804 // statuses into API container statuses. 805 func (e *edged) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus { 806 convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *v1.ContainerStatus { 807 cid := cs.ID.String() 808 cstatus := &v1.ContainerStatus{ 809 Name: cs.Name, 810 RestartCount: int32(cs.RestartCount), 811 Image: cs.Image, 812 ImageID: cs.ImageID, 813 ContainerID: cid, 814 } 815 switch cs.State { 816 case kubecontainer.ContainerStateRunning: 817 cstatus.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)} 818 cstatus.Ready = true 819 case kubecontainer.ContainerStateCreated: 820 // Treat containers in the "created" state as if they are exited. 821 // The pod workers are supposed start all containers it creates in 822 // one sync (syncPod) iteration. There should not be any normal 823 // "created" containers when the pod worker generates the status at 824 // the beginning of a sync iteration. 825 fallthrough 826 case kubecontainer.ContainerStateExited: 827 cstatus.State.Terminated = &v1.ContainerStateTerminated{ 828 ExitCode: int32(cs.ExitCode), 829 Reason: cs.Reason, 830 Message: cs.Message, 831 StartedAt: metav1.NewTime(cs.StartedAt), 832 FinishedAt: metav1.NewTime(cs.FinishedAt), 833 ContainerID: cid, 834 } 835 default: 836 cstatus.State.Waiting = &v1.ContainerStateWaiting{} 837 } 838 return cstatus 839 } 840 841 // Fetch old containers statuses from old pod status. 842 oldStatuses := make(map[string]v1.ContainerStatus, len(containers)) 843 for _, cstatus := range previousStatus { 844 oldStatuses[cstatus.Name] = cstatus 845 } 846 847 // Set all container statuses to default waiting state 848 statuses := make(map[string]*v1.ContainerStatus, len(containers)) 849 defaultWaitingState := v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "ContainerCreating"}} 850 if hasInitContainers { 851 defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "PodInitializing"}} 852 } 853 854 for _, container := range containers { 855 cstatus := &v1.ContainerStatus{ 856 Name: container.Name, 857 Image: container.Image, 858 State: defaultWaitingState, 859 } 860 oldStatus, found := oldStatuses[container.Name] 861 if found { 862 if oldStatus.State.Terminated != nil { 863 // Do not update status on terminated init containers as 864 // they be removed at any time. 865 cstatus = &oldStatus 866 } else { 867 // Apply some values from the old statuses as the default values. 868 cstatus.RestartCount = oldStatus.RestartCount 869 cstatus.LastTerminationState = oldStatus.LastTerminationState 870 } 871 } 872 statuses[container.Name] = cstatus 873 } 874 875 // Make the latest container status comes first. 876 sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses))) 877 // Set container statuses according to the statuses seen in pod status 878 containerSeen := map[string]int{} 879 for _, cStatus := range podStatus.ContainerStatuses { 880 cName := cStatus.Name 881 if _, ok := statuses[cName]; !ok { 882 // This would also ignore the infra container. 883 continue 884 } 885 if containerSeen[cName] >= 2 { 886 continue 887 } 888 cstatus := convertContainerStatus(cStatus) 889 if containerSeen[cName] == 0 { 890 statuses[cName] = cstatus 891 } else { 892 statuses[cName].LastTerminationState = cstatus.State 893 } 894 containerSeen[cName] = containerSeen[cName] + 1 895 } 896 897 // Handle the containers failed to be started, which should be in Waiting state. 898 for _, container := range containers { 899 if isInitContainer { 900 // If the init container is terminated with exit code 0, it won't be restarted. 901 // TODO(random-liu): Handle this in a cleaner way. 902 s := podStatus.FindContainerStatusByName(container.Name) 903 if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 { 904 continue 905 } 906 } 907 // If a container should be restarted in next syncpod, it is *Waiting*. 908 if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { 909 continue 910 } 911 cstatus := statuses[container.Name] 912 if cstatus.State.Terminated != nil { 913 cstatus.LastTerminationState = cstatus.State 914 } 915 statuses[container.Name] = cstatus 916 } 917 918 var containerStatuses []v1.ContainerStatus 919 for _, cstatus := range statuses { 920 containerStatuses = append(containerStatuses, *cstatus) 921 } 922 923 // Sort the container statuses since clients of this interface expect the list 924 // of containers in a pod has a deterministic order. 925 if isInitContainer { 926 kubetypes.SortInitContainerStatuses(pod, containerStatuses) 927 } else { 928 sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses)) 929 } 930 return containerStatuses 931 } 932 933 func (e *edged) updatePodStatus(pod *v1.Pod) error { 934 var podStatus *v1.PodStatus 935 var newStatus v1.PodStatus 936 var podStatusRemote *kubecontainer.PodStatus 937 var err error 938 if e.containerRuntime != nil { 939 podStatusRemote, err = e.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace) 940 if err != nil { 941 containerStatus := &kubecontainer.ContainerStatus{} 942 kubeStatus := toKubeContainerStatus(v1.PodUnknown, containerStatus) 943 podStatus = &v1.PodStatus{Phase: v1.PodUnknown, ContainerStatuses: []v1.ContainerStatus{kubeStatus}} 944 } else { 945 if pod.DeletionTimestamp != nil { 946 containerStatus := &kubecontainer.ContainerStatus{State: kubecontainer.ContainerStateExited, 947 Reason: "Completed"} 948 kubeStatus := toKubeContainerStatus(v1.PodSucceeded, containerStatus) 949 podStatus = &v1.PodStatus{Phase: v1.PodSucceeded, ContainerStatuses: []v1.ContainerStatus{kubeStatus}} 950 951 } else { 952 podStatus = e.convertStatusToAPIStatus(pod, podStatusRemote) 953 // Assume info is ready to process 954 spec := &pod.Spec 955 allStatus := append(append([]v1.ContainerStatus{}, podStatus.ContainerStatuses...), podStatus.InitContainerStatuses...) 956 podStatus.Phase = getPhase(spec, allStatus) 957 // Check for illegal phase transition 958 if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { 959 // API server shows terminal phase; transitions are not allowed 960 if podStatus.Phase != pod.Status.Phase { 961 klog.Errorf("Pod attempted illegal phase transition from %s to %s: %v", pod.Status.Phase, podStatus.Phase, podStatus) 962 // Force back to phase from the API server 963 podStatus.Phase = pod.Status.Phase 964 } 965 } 966 } 967 } 968 } 969 970 newStatus = *podStatus.DeepCopy() 971 972 e.probeManager.UpdatePodStatus(pod.UID, &newStatus) 973 if e.containerRuntime != nil { 974 spec := &pod.Spec 975 newStatus.Conditions = append(newStatus.Conditions, status.GeneratePodInitializedCondition(spec, newStatus.InitContainerStatuses, newStatus.Phase)) 976 newStatus.Conditions = append(newStatus.Conditions, status.GeneratePodReadyCondition(spec, newStatus.Conditions, newStatus.ContainerStatuses, newStatus.Phase)) 977 //newStatus.Conditions = append(newStatus.Conditions, status.GenerateContainersReadyCondition(spec, newStatus.ContainerStatuses, newStatus.Phase)) 978 newStatus.Conditions = append(newStatus.Conditions, v1.PodCondition{ 979 Type: v1.PodScheduled, 980 Status: v1.ConditionTrue, 981 }) 982 } 983 pod.Status = newStatus 984 e.statusManager.SetPodStatus(pod, newStatus) 985 return err 986 } 987 988 func toKubeContainerStatus(phase v1.PodPhase, status *kubecontainer.ContainerStatus) v1.ContainerStatus { 989 kubeStatus := v1.ContainerStatus{ 990 Name: status.Name, 991 RestartCount: int32(status.RestartCount), 992 ImageID: status.ImageID, 993 Image: status.Image, 994 ContainerID: status.ID.ID, 995 } 996 997 switch phase { 998 case v1.PodRunning: 999 kubeStatus.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.Time{Time: status.StartedAt}} 1000 kubeStatus.Ready = true 1001 case v1.PodFailed, v1.PodSucceeded: 1002 kubeStatus.State.Terminated = &v1.ContainerStateTerminated{ 1003 ExitCode: int32(status.ExitCode), 1004 Reason: status.Reason, 1005 Message: status.Message, 1006 StartedAt: metav1.Time{Time: status.StartedAt}, 1007 FinishedAt: metav1.Time{Time: status.FinishedAt}, 1008 ContainerID: status.ID.ID, 1009 } 1010 default: 1011 kubeStatus.State.Waiting = &v1.ContainerStateWaiting{ 1012 Reason: status.Reason, 1013 Message: status.Message, 1014 } 1015 } 1016 return kubeStatus 1017 } 1018 1019 // getPhase returns the phase of a pod given its container info. 1020 func getPhase(spec *v1.PodSpec, info []v1.ContainerStatus) v1.PodPhase { 1021 initialized := 0 1022 pendingInitialization := 0 1023 failedInitialization := 0 1024 for _, container := range spec.InitContainers { 1025 containerStatus, ok := podutil.GetContainerStatus(info, container.Name) 1026 if !ok { 1027 pendingInitialization++ 1028 continue 1029 } 1030 1031 switch { 1032 case containerStatus.State.Running != nil: 1033 pendingInitialization++ 1034 case containerStatus.State.Terminated != nil: 1035 if containerStatus.State.Terminated.ExitCode == 0 { 1036 initialized++ 1037 } else { 1038 failedInitialization++ 1039 } 1040 case containerStatus.State.Waiting != nil: 1041 if containerStatus.LastTerminationState.Terminated != nil { 1042 if containerStatus.LastTerminationState.Terminated.ExitCode == 0 { 1043 initialized++ 1044 } else { 1045 failedInitialization++ 1046 } 1047 } else { 1048 pendingInitialization++ 1049 } 1050 default: 1051 pendingInitialization++ 1052 } 1053 } 1054 1055 unknown := 0 1056 running := 0 1057 waiting := 0 1058 stopped := 0 1059 failed := 0 1060 succeeded := 0 1061 for _, container := range spec.Containers { 1062 containerStatus, ok := podutil.GetContainerStatus(info, container.Name) 1063 if !ok { 1064 unknown++ 1065 continue 1066 } 1067 1068 switch { 1069 case containerStatus.State.Running != nil: 1070 running++ 1071 case containerStatus.State.Terminated != nil: 1072 stopped++ 1073 if containerStatus.State.Terminated.ExitCode == 0 { 1074 succeeded++ 1075 } else { 1076 failed++ 1077 } 1078 case containerStatus.State.Waiting != nil: 1079 if containerStatus.LastTerminationState.Terminated != nil { 1080 stopped++ 1081 } else { 1082 waiting++ 1083 } 1084 default: 1085 unknown++ 1086 } 1087 } 1088 1089 if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever { 1090 return v1.PodFailed 1091 } 1092 1093 switch { 1094 case pendingInitialization > 0: 1095 fallthrough 1096 case waiting > 0: 1097 klog.Info("pod waiting > 0, pending") 1098 // One or more containers has not been started 1099 return v1.PodPending 1100 case running > 0 && unknown == 0: 1101 // All containers have been started, and at least 1102 // one container is running 1103 return v1.PodRunning 1104 case running == 0 && stopped > 0 && unknown == 0: 1105 // All containers are terminated 1106 if spec.RestartPolicy == v1.RestartPolicyAlways { 1107 // All containers are in the process of restarting 1108 return v1.PodRunning 1109 } 1110 if stopped == succeeded { 1111 // RestartPolicy is not Always, and all 1112 // containers are terminated in success 1113 return v1.PodSucceeded 1114 } 1115 if spec.RestartPolicy == v1.RestartPolicyNever { 1116 // RestartPolicy is Never, and all containers are 1117 // terminated with at least one in failure 1118 return v1.PodFailed 1119 } 1120 // RestartPolicy is OnFailure, and at least one in failure 1121 // and in the process of restarting 1122 return v1.PodRunning 1123 default: 1124 klog.Info("pod default case, pending") 1125 return v1.PodPending 1126 } 1127 } 1128 1129 func (e *edged) getHostIPByInterface() (string, error) { 1130 iface, err := net.InterfaceByName(e.interfaceName) 1131 if err != nil { 1132 return "", fmt.Errorf("failed to get network interface: %v err:%v", e.interfaceName, err) 1133 } 1134 if iface == nil { 1135 return "", fmt.Errorf("input iface is nil") 1136 } 1137 1138 addrs, err := iface.Addrs() 1139 if err != nil { 1140 return "", err 1141 } 1142 for _, addr := range addrs { 1143 ip, _, err := net.ParseCIDR(addr.String()) 1144 if err != nil { 1145 continue 1146 } 1147 if ip.To4() != nil { 1148 return ip.String(), nil 1149 } 1150 } 1151 return "", fmt.Errorf("no ip and mask in this network card") 1152 }