k8s.io/kubernetes@v1.29.3/pkg/kubelet/kuberuntime/kuberuntime_container.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package kuberuntime 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "net/url" 26 "os" 27 "path/filepath" 28 "regexp" 29 goruntime "runtime" 30 "sort" 31 "strconv" 32 "strings" 33 "sync" 34 "time" 35 36 crierror "k8s.io/cri-api/pkg/errors" 37 38 "github.com/opencontainers/selinux/go-selinux" 39 grpcstatus "google.golang.org/grpc/status" 40 41 "github.com/armon/circbuf" 42 "k8s.io/klog/v2" 43 44 v1 "k8s.io/api/core/v1" 45 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 46 kubetypes "k8s.io/apimachinery/pkg/types" 47 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 48 "k8s.io/apimachinery/pkg/util/sets" 49 utilfeature "k8s.io/apiserver/pkg/util/feature" 50 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 51 kubelettypes "k8s.io/kubelet/pkg/types" 52 "k8s.io/kubernetes/pkg/features" 53 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 54 "k8s.io/kubernetes/pkg/kubelet/cri/remote" 55 "k8s.io/kubernetes/pkg/kubelet/events" 56 proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" 57 "k8s.io/kubernetes/pkg/kubelet/types" 58 "k8s.io/kubernetes/pkg/kubelet/util/format" 59 "k8s.io/kubernetes/pkg/util/tail" 60 volumeutil "k8s.io/kubernetes/pkg/volume/util" 61 ) 62 63 var ( 64 // ErrCreateContainerConfig - failed to create container config 65 ErrCreateContainerConfig = errors.New("CreateContainerConfigError") 66 // ErrPreCreateHook - failed to execute PreCreateHook 67 ErrPreCreateHook = errors.New("PreCreateHookError") 68 // ErrCreateContainer - failed to create container 69 ErrCreateContainer = errors.New("CreateContainerError") 70 // ErrPreStartHook - failed to execute PreStartHook 71 ErrPreStartHook = errors.New("PreStartHookError") 72 // ErrPostStartHook - failed to execute PostStartHook 73 ErrPostStartHook = errors.New("PostStartHookError") 74 ) 75 76 // recordContainerEvent should be used by the runtime manager for all container related events. 77 // it has sanity checks to ensure that we do not write events that can abuse our masters. 78 // in particular, it ensures that a containerID never appears in an event message as that 79 // is prone to causing a lot of distinct events that do not count well. 80 // it replaces any reference to a containerID with the containerName which is stable, and is what users know. 81 func (m *kubeGenericRuntimeManager) recordContainerEvent(pod *v1.Pod, container *v1.Container, containerID, eventType, reason, message string, args ...interface{}) { 82 ref, err := kubecontainer.GenerateContainerRef(pod, container) 83 if err != nil { 84 klog.ErrorS(err, "Can't make a container ref", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) 85 return 86 } 87 eventMessage := message 88 if len(args) > 0 { 89 eventMessage = fmt.Sprintf(message, args...) 90 } 91 // this is a hack, but often the error from the runtime includes the containerID 92 // which kills our ability to deduplicate events. this protection makes a huge 93 // difference in the number of unique events 94 if containerID != "" { 95 eventMessage = strings.Replace(eventMessage, containerID, container.Name, -1) 96 } 97 m.recorder.Event(ref, eventType, reason, eventMessage) 98 } 99 100 // startSpec wraps the spec required to start a container, either a regular/init container 101 // or an ephemeral container. Ephemeral containers contain all the fields of regular/init 102 // containers, plus some additional fields. In both cases startSpec.container will be set. 103 type startSpec struct { 104 container *v1.Container 105 ephemeralContainer *v1.EphemeralContainer 106 } 107 108 func containerStartSpec(c *v1.Container) *startSpec { 109 return &startSpec{container: c} 110 } 111 112 func ephemeralContainerStartSpec(ec *v1.EphemeralContainer) *startSpec { 113 return &startSpec{ 114 container: (*v1.Container)(&ec.EphemeralContainerCommon), 115 ephemeralContainer: ec, 116 } 117 } 118 119 // getTargetID returns the kubecontainer.ContainerID for ephemeral container namespace 120 // targeting. The target is stored as EphemeralContainer.TargetContainerName, which must be 121 // resolved to a ContainerID using podStatus. The target container must already exist, which 122 // usually isn't a problem since ephemeral containers aren't allowed at pod creation time. 123 func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontainer.ContainerID, error) { 124 if s.ephemeralContainer == nil || s.ephemeralContainer.TargetContainerName == "" { 125 return nil, nil 126 } 127 128 targetStatus := podStatus.FindContainerStatusByName(s.ephemeralContainer.TargetContainerName) 129 if targetStatus == nil { 130 return nil, fmt.Errorf("unable to find target container %v", s.ephemeralContainer.TargetContainerName) 131 } 132 133 return &targetStatus.ID, nil 134 } 135 136 func calcRestartCountByLogDir(path string) (int, error) { 137 // if the path doesn't exist then it's not an error 138 if _, err := os.Stat(path); err != nil { 139 return 0, nil 140 } 141 files, err := os.ReadDir(path) 142 if err != nil { 143 return 0, err 144 } 145 if len(files) == 0 { 146 return 0, nil 147 } 148 restartCount := 0 149 restartCountLogFileRegex := regexp.MustCompile(`^(\d+)\.log(\..*)?`) 150 for _, file := range files { 151 if file.IsDir() { 152 continue 153 } 154 matches := restartCountLogFileRegex.FindStringSubmatch(file.Name()) 155 if len(matches) == 0 { 156 continue 157 } 158 count, err := strconv.Atoi(matches[1]) 159 if err != nil { 160 // unlikely kubelet created this file, 161 // likely custom file with random numbers as a name 162 continue 163 } 164 count++ 165 if count > restartCount { 166 restartCount = count 167 } 168 } 169 return restartCount, nil 170 } 171 172 // startContainer starts a container and returns a message indicates why it is failed on error. 173 // It starts the container through the following steps: 174 // * pull the image 175 // * create the container 176 // * start the container 177 // * run the post start lifecycle hooks (if applicable) 178 func (m *kubeGenericRuntimeManager) startContainer(ctx context.Context, podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, spec *startSpec, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string, podIPs []string) (string, error) { 179 container := spec.container 180 181 // Step 1: pull the image. 182 183 // If RuntimeClassInImageCriAPI feature gate is enabled, pass runtimehandler 184 // information for the runtime class specified. If not runtime class is 185 // specified, then pass "" 186 podRuntimeHandler := "" 187 var err error 188 if utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) { 189 if pod.Spec.RuntimeClassName != nil && *pod.Spec.RuntimeClassName != "" { 190 podRuntimeHandler, err = m.runtimeClassManager.LookupRuntimeHandler(pod.Spec.RuntimeClassName) 191 if err != nil { 192 msg := fmt.Sprintf("Failed to lookup runtimeHandler for runtimeClassName %v", pod.Spec.RuntimeClassName) 193 return msg, err 194 } 195 } 196 } 197 198 imageRef, msg, err := m.imagePuller.EnsureImageExists(ctx, pod, container, pullSecrets, podSandboxConfig, podRuntimeHandler) 199 if err != nil { 200 s, _ := grpcstatus.FromError(err) 201 m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message()) 202 return msg, err 203 } 204 205 // Step 2: create the container. 206 // For a new container, the RestartCount should be 0 207 restartCount := 0 208 containerStatus := podStatus.FindContainerStatusByName(container.Name) 209 if containerStatus != nil { 210 restartCount = containerStatus.RestartCount + 1 211 } else { 212 // The container runtime keeps state on container statuses and 213 // what the container restart count is. When nodes are rebooted 214 // some container runtimes clear their state which causes the 215 // restartCount to be reset to 0. This causes the logfile to 216 // start at 0.log, which either overwrites or appends to the 217 // already existing log. 218 // 219 // We are checking to see if the log directory exists, and find 220 // the latest restartCount by checking the log name - 221 // {restartCount}.log - and adding 1 to it. 222 logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name) 223 restartCount, err = calcRestartCountByLogDir(logDir) 224 if err != nil { 225 klog.InfoS("Cannot calculate restartCount from the log directory", "logDir", logDir, "err", err) 226 restartCount = 0 227 } 228 } 229 230 target, err := spec.getTargetID(podStatus) 231 if err != nil { 232 s, _ := grpcstatus.FromError(err) 233 m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message()) 234 return s.Message(), ErrCreateContainerConfig 235 } 236 237 containerConfig, cleanupAction, err := m.generateContainerConfig(ctx, container, pod, restartCount, podIP, imageRef, podIPs, target) 238 if cleanupAction != nil { 239 defer cleanupAction() 240 } 241 if err != nil { 242 s, _ := grpcstatus.FromError(err) 243 m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message()) 244 return s.Message(), ErrCreateContainerConfig 245 } 246 247 err = m.internalLifecycle.PreCreateContainer(pod, container, containerConfig) 248 if err != nil { 249 s, _ := grpcstatus.FromError(err) 250 m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Internal PreCreateContainer hook failed: %v", s.Message()) 251 return s.Message(), ErrPreCreateHook 252 } 253 254 containerID, err := m.runtimeService.CreateContainer(ctx, podSandboxID, containerConfig, podSandboxConfig) 255 if err != nil { 256 s, _ := grpcstatus.FromError(err) 257 m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message()) 258 return s.Message(), ErrCreateContainer 259 } 260 err = m.internalLifecycle.PreStartContainer(pod, container, containerID) 261 if err != nil { 262 s, _ := grpcstatus.FromError(err) 263 m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", s.Message()) 264 return s.Message(), ErrPreStartHook 265 } 266 m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, fmt.Sprintf("Created container %s", container.Name)) 267 268 // Step 3: start the container. 269 err = m.runtimeService.StartContainer(ctx, containerID) 270 if err != nil { 271 s, _ := grpcstatus.FromError(err) 272 m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", s.Message()) 273 return s.Message(), kubecontainer.ErrRunContainer 274 } 275 m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, fmt.Sprintf("Started container %s", container.Name)) 276 277 // Symlink container logs to the legacy container log location for cluster logging 278 // support. 279 // TODO(random-liu): Remove this after cluster logging supports CRI container log path. 280 containerMeta := containerConfig.GetMetadata() 281 sandboxMeta := podSandboxConfig.GetMetadata() 282 legacySymlink := legacyLogSymlink(containerID, containerMeta.Name, sandboxMeta.Name, 283 sandboxMeta.Namespace) 284 containerLog := filepath.Join(podSandboxConfig.LogDirectory, containerConfig.LogPath) 285 // only create legacy symlink if containerLog path exists (or the error is not IsNotExist). 286 // Because if containerLog path does not exist, only dangling legacySymlink is created. 287 // This dangling legacySymlink is later removed by container gc, so it does not make sense 288 // to create it in the first place. it happens when journald logging driver is used with docker. 289 if _, err := m.osInterface.Stat(containerLog); !os.IsNotExist(err) { 290 if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil { 291 klog.ErrorS(err, "Failed to create legacy symbolic link", "path", legacySymlink, 292 "containerID", containerID, "containerLogPath", containerLog) 293 } 294 } 295 296 // Step 4: execute the post start hook. 297 if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { 298 kubeContainerID := kubecontainer.ContainerID{ 299 Type: m.runtimeName, 300 ID: containerID, 301 } 302 msg, handlerErr := m.runner.Run(ctx, kubeContainerID, pod, container, container.Lifecycle.PostStart) 303 if handlerErr != nil { 304 klog.ErrorS(handlerErr, "Failed to execute PostStartHook", "pod", klog.KObj(pod), 305 "podUID", pod.UID, "containerName", container.Name, "containerID", kubeContainerID.String()) 306 // do not record the message in the event so that secrets won't leak from the server. 307 m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, "PostStartHook failed") 308 if err := m.killContainer(ctx, pod, kubeContainerID, container.Name, "FailedPostStartHook", reasonFailedPostStartHook, nil, nil); err != nil { 309 klog.ErrorS(err, "Failed to kill container", "pod", klog.KObj(pod), 310 "podUID", pod.UID, "containerName", container.Name, "containerID", kubeContainerID.String()) 311 } 312 return msg, ErrPostStartHook 313 } 314 } 315 316 return "", nil 317 } 318 319 // generateContainerConfig generates container config for kubelet runtime v1. 320 func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context, container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, podIPs []string, nsTarget *kubecontainer.ContainerID) (*runtimeapi.ContainerConfig, func(), error) { 321 opts, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(ctx, pod, container, podIP, podIPs) 322 if err != nil { 323 return nil, nil, err 324 } 325 326 uid, username, err := m.getImageUser(ctx, container.Image) 327 if err != nil { 328 return nil, cleanupAction, err 329 } 330 331 // Verify RunAsNonRoot. Non-root verification only supports numeric user. 332 if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil { 333 return nil, cleanupAction, err 334 } 335 336 command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) 337 logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name) 338 err = m.osInterface.MkdirAll(logDir, 0755) 339 if err != nil { 340 return nil, cleanupAction, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err) 341 } 342 containerLogsPath := buildContainerLogsPath(container.Name, restartCount) 343 restartCountUint32 := uint32(restartCount) 344 config := &runtimeapi.ContainerConfig{ 345 Metadata: &runtimeapi.ContainerMetadata{ 346 Name: container.Name, 347 Attempt: restartCountUint32, 348 }, 349 Image: &runtimeapi.ImageSpec{Image: imageRef, UserSpecifiedImage: container.Image}, 350 Command: command, 351 Args: args, 352 WorkingDir: container.WorkingDir, 353 Labels: newContainerLabels(container, pod), 354 Annotations: newContainerAnnotations(container, pod, restartCount, opts), 355 Devices: makeDevices(opts), 356 CDIDevices: makeCDIDevices(opts), 357 Mounts: m.makeMounts(opts, container), 358 LogPath: containerLogsPath, 359 Stdin: container.Stdin, 360 StdinOnce: container.StdinOnce, 361 Tty: container.TTY, 362 } 363 364 // set platform specific configurations. 365 if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username, nsTarget); err != nil { 366 return nil, cleanupAction, err 367 } 368 369 // set environment variables 370 envs := make([]*runtimeapi.KeyValue, len(opts.Envs)) 371 for idx := range opts.Envs { 372 e := opts.Envs[idx] 373 envs[idx] = &runtimeapi.KeyValue{ 374 Key: e.Name, 375 Value: e.Value, 376 } 377 } 378 config.Envs = envs 379 380 return config, cleanupAction, nil 381 } 382 383 func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error { 384 containerResources := m.generateContainerResources(pod, container) 385 if containerResources == nil { 386 return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String()) 387 } 388 ctx := context.Background() 389 err := m.runtimeService.UpdateContainerResources(ctx, containerID.ID, containerResources) 390 if err != nil { 391 klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String()) 392 } 393 return err 394 } 395 396 // makeDevices generates container devices for kubelet runtime v1. 397 func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device { 398 devices := make([]*runtimeapi.Device, len(opts.Devices)) 399 400 for idx := range opts.Devices { 401 device := opts.Devices[idx] 402 devices[idx] = &runtimeapi.Device{ 403 HostPath: device.PathOnHost, 404 ContainerPath: device.PathInContainer, 405 Permissions: device.Permissions, 406 } 407 } 408 409 return devices 410 } 411 412 // makeCDIDevices generates container CDIDevices for kubelet runtime v1. 413 func makeCDIDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.CDIDevice { 414 devices := make([]*runtimeapi.CDIDevice, len(opts.CDIDevices)) 415 416 for i, device := range opts.CDIDevices { 417 devices[i] = &runtimeapi.CDIDevice{ 418 Name: device.Name, 419 } 420 } 421 422 return devices 423 } 424 425 // makeMounts generates container volume mounts for kubelet runtime v1. 426 func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *v1.Container) []*runtimeapi.Mount { 427 volumeMounts := []*runtimeapi.Mount{} 428 429 for idx := range opts.Mounts { 430 v := opts.Mounts[idx] 431 selinuxRelabel := v.SELinuxRelabel && selinux.GetEnabled() 432 mount := &runtimeapi.Mount{ 433 HostPath: v.HostPath, 434 ContainerPath: v.ContainerPath, 435 Readonly: v.ReadOnly, 436 SelinuxRelabel: selinuxRelabel, 437 Propagation: v.Propagation, 438 } 439 440 volumeMounts = append(volumeMounts, mount) 441 } 442 443 // The reason we create and mount the log file in here (not in kubelet) is because 444 // the file's location depends on the ID of the container, and we need to create and 445 // mount the file before actually starting the container. 446 if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 { 447 // Because the PodContainerDir contains pod uid and container name which is unique enough, 448 // here we just add a random id to make the path unique for different instances 449 // of the same container. 450 cid := makeUID() 451 containerLogPath := filepath.Join(opts.PodContainerDir, cid) 452 fs, err := m.osInterface.Create(containerLogPath) 453 if err != nil { 454 utilruntime.HandleError(fmt.Errorf("error on creating termination-log file %q: %v", containerLogPath, err)) 455 } else { 456 fs.Close() 457 458 // Chmod is needed because os.Create() ends up calling 459 // open(2) to create the file, so the final mode used is "mode & 460 // ~umask". But we want to make sure the specified mode is used 461 // in the file no matter what the umask is. 462 if err := m.osInterface.Chmod(containerLogPath, 0666); err != nil { 463 utilruntime.HandleError(fmt.Errorf("unable to set termination-log file permissions %q: %v", containerLogPath, err)) 464 } 465 466 // Volume Mounts fail on Windows if it is not of the form C:/ 467 containerLogPath = volumeutil.MakeAbsolutePath(goruntime.GOOS, containerLogPath) 468 terminationMessagePath := volumeutil.MakeAbsolutePath(goruntime.GOOS, container.TerminationMessagePath) 469 selinuxRelabel := selinux.GetEnabled() 470 volumeMounts = append(volumeMounts, &runtimeapi.Mount{ 471 HostPath: containerLogPath, 472 ContainerPath: terminationMessagePath, 473 SelinuxRelabel: selinuxRelabel, 474 }) 475 } 476 } 477 478 return volumeMounts 479 } 480 481 // getKubeletContainers lists containers managed by kubelet. 482 // The boolean parameter specifies whether returns all containers including 483 // those already exited and dead containers (used for garbage collection). 484 func (m *kubeGenericRuntimeManager) getKubeletContainers(ctx context.Context, allContainers bool) ([]*runtimeapi.Container, error) { 485 filter := &runtimeapi.ContainerFilter{} 486 if !allContainers { 487 filter.State = &runtimeapi.ContainerStateValue{ 488 State: runtimeapi.ContainerState_CONTAINER_RUNNING, 489 } 490 } 491 492 containers, err := m.runtimeService.ListContainers(ctx, filter) 493 if err != nil { 494 klog.ErrorS(err, "ListContainers failed") 495 return nil, err 496 } 497 498 return containers, nil 499 } 500 501 // makeUID returns a randomly generated string. 502 func makeUID() string { 503 return fmt.Sprintf("%08x", rand.Uint32()) 504 } 505 506 // getTerminationMessage looks on the filesystem for the provided termination message path, returning a limited 507 // amount of those bytes, or returns true if the logs should be checked. 508 func getTerminationMessage(status *runtimeapi.ContainerStatus, terminationMessagePath string, fallbackToLogs bool) (string, bool) { 509 if len(terminationMessagePath) == 0 { 510 return "", fallbackToLogs 511 } 512 // Volume Mounts fail on Windows if it is not of the form C:/ 513 terminationMessagePath = volumeutil.MakeAbsolutePath(goruntime.GOOS, terminationMessagePath) 514 for _, mount := range status.Mounts { 515 if mount.ContainerPath != terminationMessagePath { 516 continue 517 } 518 path := mount.HostPath 519 data, _, err := tail.ReadAtMost(path, kubecontainer.MaxContainerTerminationMessageLength) 520 if err != nil { 521 if os.IsNotExist(err) { 522 return "", fallbackToLogs 523 } 524 return fmt.Sprintf("Error on reading termination log %s: %v", path, err), false 525 } 526 return string(data), (fallbackToLogs && len(data) == 0) 527 } 528 return "", fallbackToLogs 529 } 530 531 // readLastStringFromContainerLogs attempts to read up to the max log length from the end of the CRI log represented 532 // by path. It reads up to max log lines. 533 func (m *kubeGenericRuntimeManager) readLastStringFromContainerLogs(path string) string { 534 value := int64(kubecontainer.MaxContainerTerminationMessageLogLines) 535 buf, _ := circbuf.NewBuffer(kubecontainer.MaxContainerTerminationMessageLogLength) 536 if err := m.ReadLogs(context.Background(), path, "", &v1.PodLogOptions{TailLines: &value}, buf, buf); err != nil { 537 return fmt.Sprintf("Error on reading termination message from logs: %v", err) 538 } 539 return buf.String() 540 } 541 542 func (m *kubeGenericRuntimeManager) convertToKubeContainerStatus(status *runtimeapi.ContainerStatus) (cStatus *kubecontainer.Status) { 543 cStatus = toKubeContainerStatus(status, m.runtimeName) 544 if status.State == runtimeapi.ContainerState_CONTAINER_EXITED { 545 // Populate the termination message if needed. 546 annotatedInfo := getContainerInfoFromAnnotations(status.Annotations) 547 // If a container cannot even be started, it certainly does not have logs, so no need to fallbackToLogs. 548 fallbackToLogs := annotatedInfo.TerminationMessagePolicy == v1.TerminationMessageFallbackToLogsOnError && 549 cStatus.ExitCode != 0 && cStatus.Reason != "ContainerCannotRun" 550 tMessage, checkLogs := getTerminationMessage(status, annotatedInfo.TerminationMessagePath, fallbackToLogs) 551 if checkLogs { 552 tMessage = m.readLastStringFromContainerLogs(status.GetLogPath()) 553 } 554 // Enrich the termination message written by the application is not empty 555 if len(tMessage) != 0 { 556 if len(cStatus.Message) != 0 { 557 cStatus.Message += ": " 558 } 559 cStatus.Message += tMessage 560 } 561 } 562 return cStatus 563 } 564 565 // getPodContainerStatuses gets all containers' statuses for the pod. 566 func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context, uid kubetypes.UID, name, namespace string) ([]*kubecontainer.Status, error) { 567 // Select all containers of the given pod. 568 containers, err := m.runtimeService.ListContainers(ctx, &runtimeapi.ContainerFilter{ 569 LabelSelector: map[string]string{kubelettypes.KubernetesPodUIDLabel: string(uid)}, 570 }) 571 if err != nil { 572 klog.ErrorS(err, "ListContainers error") 573 return nil, err 574 } 575 576 statuses := []*kubecontainer.Status{} 577 // TODO: optimization: set maximum number of containers per container name to examine. 578 for _, c := range containers { 579 resp, err := m.runtimeService.ContainerStatus(ctx, c.Id, false) 580 // Between List (ListContainers) and check (ContainerStatus) another thread might remove a container, and that is normal. 581 // The previous call (ListContainers) never fails due to a pod container not existing. 582 // Therefore, this method should not either, but instead act as if the previous call failed, 583 // which means the error should be ignored. 584 if crierror.IsNotFound(err) { 585 continue 586 } 587 if err != nil { 588 // Merely log this here; GetPodStatus will actually report the error out. 589 klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err) 590 return nil, err 591 } 592 status := resp.GetStatus() 593 if status == nil { 594 return nil, remote.ErrContainerStatusNil 595 } 596 cStatus := m.convertToKubeContainerStatus(status) 597 statuses = append(statuses, cStatus) 598 } 599 600 sort.Sort(containerStatusByCreated(statuses)) 601 return statuses, nil 602 } 603 604 func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status { 605 annotatedInfo := getContainerInfoFromAnnotations(status.Annotations) 606 labeledInfo := getContainerInfoFromLabels(status.Labels) 607 var cStatusResources *kubecontainer.ContainerResources 608 if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { 609 // If runtime reports cpu & memory resources info, add it to container status 610 cStatusResources = toKubeContainerResources(status.Resources) 611 } 612 cStatus := &kubecontainer.Status{ 613 ID: kubecontainer.ContainerID{ 614 Type: runtimeName, 615 ID: status.Id, 616 }, 617 Name: labeledInfo.ContainerName, 618 Image: status.Image.Image, 619 ImageID: status.ImageRef, 620 ImageRuntimeHandler: status.Image.RuntimeHandler, 621 Hash: annotatedInfo.Hash, 622 HashWithoutResources: annotatedInfo.HashWithoutResources, 623 RestartCount: annotatedInfo.RestartCount, 624 State: toKubeContainerState(status.State), 625 CreatedAt: time.Unix(0, status.CreatedAt), 626 Resources: cStatusResources, 627 } 628 629 if status.State != runtimeapi.ContainerState_CONTAINER_CREATED { 630 // If container is not in the created state, we have tried and 631 // started the container. Set the StartedAt time. 632 cStatus.StartedAt = time.Unix(0, status.StartedAt) 633 } 634 if status.State == runtimeapi.ContainerState_CONTAINER_EXITED { 635 cStatus.Reason = status.Reason 636 cStatus.Message = status.Message 637 cStatus.ExitCode = int(status.ExitCode) 638 cStatus.FinishedAt = time.Unix(0, status.FinishedAt) 639 } 640 return cStatus 641 } 642 643 // executePreStopHook runs the pre-stop lifecycle hooks if applicable and returns the duration it takes. 644 func (m *kubeGenericRuntimeManager) executePreStopHook(ctx context.Context, pod *v1.Pod, containerID kubecontainer.ContainerID, containerSpec *v1.Container, gracePeriod int64) int64 { 645 klog.V(3).InfoS("Running preStop hook", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", containerSpec.Name, "containerID", containerID.String()) 646 647 start := metav1.Now() 648 done := make(chan struct{}) 649 go func() { 650 defer close(done) 651 defer utilruntime.HandleCrash() 652 if _, err := m.runner.Run(ctx, containerID, pod, containerSpec, containerSpec.Lifecycle.PreStop); err != nil { 653 klog.ErrorS(err, "PreStop hook failed", "pod", klog.KObj(pod), "podUID", pod.UID, 654 "containerName", containerSpec.Name, "containerID", containerID.String()) 655 // do not record the message in the event so that secrets won't leak from the server. 656 m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeWarning, events.FailedPreStopHook, "PreStopHook failed") 657 } 658 }() 659 660 select { 661 case <-time.After(time.Duration(gracePeriod) * time.Second): 662 klog.V(2).InfoS("PreStop hook not completed in grace period", "pod", klog.KObj(pod), "podUID", pod.UID, 663 "containerName", containerSpec.Name, "containerID", containerID.String(), "gracePeriod", gracePeriod) 664 case <-done: 665 klog.V(3).InfoS("PreStop hook completed", "pod", klog.KObj(pod), "podUID", pod.UID, 666 "containerName", containerSpec.Name, "containerID", containerID.String()) 667 } 668 669 return int64(metav1.Now().Sub(start.Time).Seconds()) 670 } 671 672 // restoreSpecsFromContainerLabels restores all information needed for killing a container. In some 673 // case we may not have pod and container spec when killing a container, e.g. pod is deleted during 674 // kubelet restart. 675 // To solve this problem, we've already written necessary information into container labels. Here we 676 // just need to retrieve them from container labels and restore the specs. 677 // TODO(random-liu): Add a node e2e test to test this behaviour. 678 // TODO(random-liu): Change the lifecycle handler to just accept information needed, so that we can 679 // just pass the needed function not create the fake object. 680 func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(ctx context.Context, containerID kubecontainer.ContainerID) (*v1.Pod, *v1.Container, error) { 681 var pod *v1.Pod 682 var container *v1.Container 683 resp, err := m.runtimeService.ContainerStatus(ctx, containerID.ID, false) 684 if err != nil { 685 return nil, nil, err 686 } 687 s := resp.GetStatus() 688 if s == nil { 689 return nil, nil, remote.ErrContainerStatusNil 690 } 691 692 l := getContainerInfoFromLabels(s.Labels) 693 a := getContainerInfoFromAnnotations(s.Annotations) 694 // Notice that the followings are not full spec. The container killing code should not use 695 // un-restored fields. 696 pod = &v1.Pod{ 697 ObjectMeta: metav1.ObjectMeta{ 698 UID: l.PodUID, 699 Name: l.PodName, 700 Namespace: l.PodNamespace, 701 DeletionGracePeriodSeconds: a.PodDeletionGracePeriod, 702 }, 703 Spec: v1.PodSpec{ 704 TerminationGracePeriodSeconds: a.PodTerminationGracePeriod, 705 }, 706 } 707 container = &v1.Container{ 708 Name: l.ContainerName, 709 Ports: a.ContainerPorts, 710 TerminationMessagePath: a.TerminationMessagePath, 711 } 712 if a.PreStopHandler != nil { 713 container.Lifecycle = &v1.Lifecycle{ 714 PreStop: a.PreStopHandler, 715 } 716 } 717 return pod, container, nil 718 } 719 720 // killContainer kills a container through the following steps: 721 // * Run the pre-stop lifecycle hooks (if applicable). 722 // * Stop the container. 723 func (m *kubeGenericRuntimeManager) killContainer(ctx context.Context, pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, reason containerKillReason, gracePeriodOverride *int64, ordering *terminationOrdering) error { 724 var containerSpec *v1.Container 725 if pod != nil { 726 if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil { 727 return fmt.Errorf("failed to get containerSpec %q (id=%q) in pod %q when killing container for reason %q", 728 containerName, containerID.String(), format.Pod(pod), message) 729 } 730 } else { 731 // Restore necessary information if one of the specs is nil. 732 restoredPod, restoredContainer, err := m.restoreSpecsFromContainerLabels(ctx, containerID) 733 if err != nil { 734 return err 735 } 736 pod, containerSpec = restoredPod, restoredContainer 737 } 738 739 // From this point, pod and container must be non-nil. 740 gracePeriod := setTerminationGracePeriod(pod, containerSpec, containerName, containerID, reason) 741 742 if len(message) == 0 { 743 message = fmt.Sprintf("Stopping container %s", containerSpec.Name) 744 } 745 m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message) 746 747 if gracePeriodOverride != nil { 748 gracePeriod = *gracePeriodOverride 749 klog.V(3).InfoS("Killing container with a grace period override", "pod", klog.KObj(pod), "podUID", pod.UID, 750 "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) 751 } 752 753 // Run the pre-stop lifecycle hooks if applicable and if there is enough time to run it 754 if containerSpec.Lifecycle != nil && containerSpec.Lifecycle.PreStop != nil && gracePeriod > 0 { 755 gracePeriod = gracePeriod - m.executePreStopHook(ctx, pod, containerID, containerSpec, gracePeriod) 756 } 757 758 // if we care about termination ordering, then wait for this container's turn to exit if there is 759 // time remaining 760 if ordering != nil && gracePeriod > 0 { 761 // grace period is only in seconds, so the time we've waited gets truncated downward 762 gracePeriod -= int64(ordering.waitForTurn(containerName, gracePeriod)) 763 } 764 765 // always give containers a minimal shutdown window to avoid unnecessary SIGKILLs 766 if gracePeriod < minimumGracePeriodInSeconds { 767 gracePeriod = minimumGracePeriodInSeconds 768 } 769 770 klog.V(2).InfoS("Killing container with a grace period", "pod", klog.KObj(pod), "podUID", pod.UID, 771 "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) 772 773 err := m.runtimeService.StopContainer(ctx, containerID.ID, gracePeriod) 774 if err != nil && !crierror.IsNotFound(err) { 775 klog.ErrorS(err, "Container termination failed with gracePeriod", "pod", klog.KObj(pod), "podUID", pod.UID, 776 "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) 777 return err 778 } 779 klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID, 780 "containerName", containerName, "containerID", containerID.String()) 781 782 if ordering != nil { 783 ordering.containerTerminated(containerName) 784 } 785 786 return nil 787 } 788 789 // killContainersWithSyncResult kills all pod's containers with sync results. 790 func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(ctx context.Context, pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (syncResults []*kubecontainer.SyncResult) { 791 containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers)) 792 wg := sync.WaitGroup{} 793 794 wg.Add(len(runningPod.Containers)) 795 var termOrdering *terminationOrdering 796 // we only care about container termination ordering if the sidecars feature is enabled 797 if utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) { 798 var runningContainerNames []string 799 for _, container := range runningPod.Containers { 800 runningContainerNames = append(runningContainerNames, container.Name) 801 } 802 termOrdering = newTerminationOrdering(pod, runningContainerNames) 803 } 804 for _, container := range runningPod.Containers { 805 go func(container *kubecontainer.Container) { 806 defer utilruntime.HandleCrash() 807 defer wg.Done() 808 809 killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) 810 if err := m.killContainer(ctx, pod, container.ID, container.Name, "", reasonUnknown, gracePeriodOverride, termOrdering); err != nil { 811 killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) 812 // Use runningPod for logging as the pod passed in could be *nil*. 813 klog.ErrorS(err, "Kill container failed", "pod", klog.KRef(runningPod.Namespace, runningPod.Name), "podUID", runningPod.ID, 814 "containerName", container.Name, "containerID", container.ID) 815 } 816 containerResults <- killContainerResult 817 }(container) 818 } 819 wg.Wait() 820 close(containerResults) 821 822 for containerResult := range containerResults { 823 syncResults = append(syncResults, containerResult) 824 } 825 return 826 } 827 828 // pruneInitContainersBeforeStart ensures that before we begin creating init 829 // containers, we have reduced the number of outstanding init containers still 830 // present. This reduces load on the container garbage collector by only 831 // preserving the most recent terminated init container. 832 func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) { 833 // only the last execution of each init container should be preserved, and only preserve it if it is in the 834 // list of init containers to keep. 835 initContainerNames := sets.NewString() 836 for _, container := range pod.Spec.InitContainers { 837 initContainerNames.Insert(container.Name) 838 } 839 for name := range initContainerNames { 840 count := 0 841 for _, status := range podStatus.ContainerStatuses { 842 if status.Name != name || 843 (status.State != kubecontainer.ContainerStateExited && 844 status.State != kubecontainer.ContainerStateUnknown) { 845 continue 846 } 847 // Remove init containers in unknown state. It should have 848 // been stopped before pruneInitContainersBeforeStart is 849 // called. 850 count++ 851 // keep the first init container for this name 852 if count == 1 { 853 continue 854 } 855 // prune all other init containers that match this container name 856 klog.V(4).InfoS("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count) 857 if err := m.removeContainer(ctx, status.ID.ID); err != nil { 858 utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) 859 continue 860 } 861 } 862 } 863 } 864 865 // Remove all init containers. Note that this function does not check the state 866 // of the container because it assumes all init containers have been stopped 867 // before the call happens. 868 func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) { 869 initContainerNames := sets.NewString() 870 for _, container := range pod.Spec.InitContainers { 871 initContainerNames.Insert(container.Name) 872 } 873 for name := range initContainerNames { 874 count := 0 875 for _, status := range podStatus.ContainerStatuses { 876 if status.Name != name { 877 continue 878 } 879 count++ 880 // Purge all init containers that match this container name 881 klog.V(4).InfoS("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count) 882 if err := m.removeContainer(ctx, status.ID.ID); err != nil { 883 utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) 884 continue 885 } 886 } 887 } 888 } 889 890 // findNextInitContainerToRun returns the status of the last failed container, the 891 // index of next init container to start, or done if there are no further init containers. 892 // Status is only returned if an init container is failed, in which case next will 893 // point to the current container. 894 func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.Status, next *v1.Container, done bool) { 895 if len(pod.Spec.InitContainers) == 0 { 896 return nil, nil, true 897 } 898 899 // If any of the main containers have status and are Running, then all init containers must 900 // have been executed at some point in the past. However, they could have been removed 901 // from the container runtime now, and if we proceed, it would appear as if they 902 // never ran and will re-execute improperly. 903 for i := range pod.Spec.Containers { 904 container := &pod.Spec.Containers[i] 905 status := podStatus.FindContainerStatusByName(container.Name) 906 if status != nil && status.State == kubecontainer.ContainerStateRunning { 907 return nil, nil, true 908 } 909 } 910 911 // If there are failed containers, return the status of the last failed one. 912 for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { 913 container := &pod.Spec.InitContainers[i] 914 status := podStatus.FindContainerStatusByName(container.Name) 915 if status != nil && isInitContainerFailed(status) { 916 return status, container, false 917 } 918 } 919 920 // There are no failed containers now. 921 for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { 922 container := &pod.Spec.InitContainers[i] 923 status := podStatus.FindContainerStatusByName(container.Name) 924 if status == nil { 925 continue 926 } 927 928 // container is still running, return not done. 929 if status.State == kubecontainer.ContainerStateRunning { 930 return nil, nil, false 931 } 932 933 if status.State == kubecontainer.ContainerStateExited { 934 // all init containers successful 935 if i == (len(pod.Spec.InitContainers) - 1) { 936 return nil, nil, true 937 } 938 939 // all containers up to i successful, go to i+1 940 return nil, &pod.Spec.InitContainers[i+1], false 941 } 942 } 943 944 return nil, &pod.Spec.InitContainers[0], false 945 } 946 947 // hasAnyRegularContainerCreated returns true if any regular container has been 948 // created, which indicates all init containers have been initialized. 949 func hasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool { 950 for _, container := range pod.Spec.Containers { 951 status := podStatus.FindContainerStatusByName(container.Name) 952 if status == nil { 953 continue 954 } 955 switch status.State { 956 case kubecontainer.ContainerStateCreated, 957 kubecontainer.ContainerStateRunning, 958 kubecontainer.ContainerStateExited: 959 return true 960 default: 961 // Ignore other states 962 } 963 } 964 return false 965 } 966 967 // computeInitContainerActions sets the actions on the given changes that need 968 // to be taken for the init containers. This includes actions to initialize the 969 // init containers and actions to keep restartable init containers running. 970 // computeInitContainerActions returns true if pod has been initialized. 971 // 972 // The actions include: 973 // - Start the first init container that has not been started. 974 // - Restart all restartable init containers that have started but are not running. 975 // - Kill the restartable init containers that are not alive or started. 976 // 977 // Note that this is a function for the SidecarContainers feature. 978 // Please sync with the findNextInitContainerToRun function if any changes are 979 // made, as either this or that function will be called. 980 func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus, changes *podActions) bool { 981 if len(pod.Spec.InitContainers) == 0 { 982 return true 983 } 984 985 // If any of the main containers have status and are Running, then all init containers must 986 // have been executed at some point in the past. However, they could have been removed 987 // from the container runtime now, and if we proceed, it would appear as if they 988 // never ran and will re-execute improperly except for the restartable init containers. 989 podHasInitialized := false 990 for _, container := range pod.Spec.Containers { 991 status := podStatus.FindContainerStatusByName(container.Name) 992 if status == nil { 993 continue 994 } 995 switch status.State { 996 case kubecontainer.ContainerStateCreated, 997 kubecontainer.ContainerStateRunning: 998 podHasInitialized = true 999 case kubecontainer.ContainerStateExited: 1000 // This is a workaround for the issue that the kubelet cannot 1001 // differentiate the container statuses of the previous podSandbox 1002 // from the current one. 1003 // If the node is rebooted, all containers will be in the exited 1004 // state and the kubelet will try to recreate a new podSandbox. 1005 // In this case, the kubelet should not mistakenly think that 1006 // the newly created podSandbox has been initialized. 1007 default: 1008 // Ignore other states 1009 } 1010 if podHasInitialized { 1011 break 1012 } 1013 } 1014 1015 // isPreviouslyInitialized indicates if the current init container is 1016 // previously initialized. 1017 isPreviouslyInitialized := podHasInitialized 1018 restartOnFailure := shouldRestartOnFailure(pod) 1019 1020 // Note that we iterate through the init containers in reverse order to find 1021 // the next init container to run, as the completed init containers may get 1022 // removed from container runtime for various reasons. Therefore the kubelet 1023 // should rely on the minimal number of init containers - the last one. 1024 // 1025 // Once we find the next init container to run, iterate through the rest to 1026 // find the restartable init containers to restart. 1027 for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { 1028 container := &pod.Spec.InitContainers[i] 1029 status := podStatus.FindContainerStatusByName(container.Name) 1030 klog.V(4).InfoS("Computing init container action", "pod", klog.KObj(pod), "container", container.Name, "status", status) 1031 if status == nil { 1032 // If the container is previously initialized but its status is not 1033 // found, it means its last status is removed for some reason. 1034 // Restart it if it is a restartable init container. 1035 if isPreviouslyInitialized && types.IsRestartableInitContainer(container) { 1036 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1037 } 1038 continue 1039 } 1040 1041 if isPreviouslyInitialized && !types.IsRestartableInitContainer(container) { 1042 // after initialization, only restartable init containers need to be kept 1043 // running 1044 continue 1045 } 1046 1047 switch status.State { 1048 case kubecontainer.ContainerStateCreated: 1049 // nothing to do but wait for it to start 1050 1051 case kubecontainer.ContainerStateRunning: 1052 if !types.IsRestartableInitContainer(container) { 1053 break 1054 } 1055 1056 if types.IsRestartableInitContainer(container) { 1057 if container.StartupProbe != nil { 1058 startup, found := m.startupManager.Get(status.ID) 1059 if !found { 1060 // If the startup probe has not been run, wait for it. 1061 break 1062 } 1063 if startup != proberesults.Success { 1064 if startup == proberesults.Failure { 1065 // If the restartable init container failed the startup probe, 1066 // restart it. 1067 changes.ContainersToKill[status.ID] = containerToKillInfo{ 1068 name: container.Name, 1069 container: container, 1070 message: fmt.Sprintf("Init container %s failed startup probe", container.Name), 1071 reason: reasonStartupProbe, 1072 } 1073 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1074 } 1075 break 1076 } 1077 } 1078 1079 klog.V(4).InfoS("Init container has been initialized", "pod", klog.KObj(pod), "container", container.Name) 1080 if i == (len(pod.Spec.InitContainers) - 1) { 1081 podHasInitialized = true 1082 } else if !isPreviouslyInitialized { 1083 // this init container is initialized for the first time, start the next one 1084 changes.InitContainersToStart = append(changes.InitContainersToStart, i+1) 1085 } 1086 1087 // A restartable init container does not have to take into account its 1088 // liveness probe when it determines to start the next init container. 1089 if container.LivenessProbe != nil { 1090 liveness, found := m.livenessManager.Get(status.ID) 1091 if !found { 1092 // If the liveness probe has not been run, wait for it. 1093 break 1094 } 1095 if liveness == proberesults.Failure { 1096 // If the restartable init container failed the liveness probe, 1097 // restart it. 1098 changes.ContainersToKill[status.ID] = containerToKillInfo{ 1099 name: container.Name, 1100 container: container, 1101 message: fmt.Sprintf("Init container %s failed liveness probe", container.Name), 1102 reason: reasonLivenessProbe, 1103 } 1104 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1105 } 1106 } 1107 } else { // init container 1108 // nothing do to but wait for it to finish 1109 break 1110 } 1111 1112 // If the init container failed and the restart policy is Never, the pod is terminal. 1113 // Otherwise, restart the init container. 1114 case kubecontainer.ContainerStateExited: 1115 if types.IsRestartableInitContainer(container) { 1116 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1117 } else { // init container 1118 if isInitContainerFailed(status) { 1119 if !restartOnFailure { 1120 changes.KillPod = true 1121 changes.InitContainersToStart = nil 1122 return false 1123 } 1124 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1125 break 1126 } 1127 1128 klog.V(4).InfoS("Init container has been initialized", "pod", klog.KObj(pod), "container", container.Name) 1129 if i == (len(pod.Spec.InitContainers) - 1) { 1130 podHasInitialized = true 1131 } else { 1132 // this init container is initialized for the first time, start the next one 1133 changes.InitContainersToStart = append(changes.InitContainersToStart, i+1) 1134 } 1135 } 1136 1137 default: // kubecontainer.ContainerStatusUnknown or other unknown states 1138 if types.IsRestartableInitContainer(container) { 1139 // If the restartable init container is in unknown state, restart it. 1140 changes.ContainersToKill[status.ID] = containerToKillInfo{ 1141 name: container.Name, 1142 container: container, 1143 message: fmt.Sprintf("Init container is in %q state, try killing it before restart", 1144 status.State), 1145 reason: reasonUnknown, 1146 } 1147 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1148 } else { // init container 1149 if !isInitContainerFailed(status) { 1150 klog.V(4).InfoS("This should not happen, init container is in unknown state but not failed", "pod", klog.KObj(pod), "containerStatus", status) 1151 } 1152 1153 if !restartOnFailure { 1154 changes.KillPod = true 1155 changes.InitContainersToStart = nil 1156 return false 1157 } 1158 1159 // If the init container is in unknown state, restart it. 1160 changes.ContainersToKill[status.ID] = containerToKillInfo{ 1161 name: container.Name, 1162 container: container, 1163 message: fmt.Sprintf("Init container is in %q state, try killing it before restart", 1164 status.State), 1165 reason: reasonUnknown, 1166 } 1167 changes.InitContainersToStart = append(changes.InitContainersToStart, i) 1168 } 1169 } 1170 1171 if !isPreviouslyInitialized { 1172 // the one before this init container has been initialized 1173 isPreviouslyInitialized = true 1174 } 1175 } 1176 1177 // this means no init containers have been started, 1178 // start the first one 1179 if !isPreviouslyInitialized { 1180 changes.InitContainersToStart = append(changes.InitContainersToStart, 0) 1181 } 1182 1183 // reverse the InitContainersToStart, as the above loop iterated through the 1184 // init containers backwards, but we want to start them as per the order in 1185 // the pod spec. 1186 l := len(changes.InitContainersToStart) 1187 for i := 0; i < l/2; i++ { 1188 changes.InitContainersToStart[i], changes.InitContainersToStart[l-1-i] = 1189 changes.InitContainersToStart[l-1-i], changes.InitContainersToStart[i] 1190 } 1191 1192 return podHasInitialized 1193 } 1194 1195 // GetContainerLogs returns logs of a specific container. 1196 func (m *kubeGenericRuntimeManager) GetContainerLogs(ctx context.Context, pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) (err error) { 1197 resp, err := m.runtimeService.ContainerStatus(ctx, containerID.ID, false) 1198 if err != nil { 1199 klog.V(4).InfoS("Failed to get container status", "containerID", containerID.String(), "err", err) 1200 return fmt.Errorf("unable to retrieve container logs for %v", containerID.String()) 1201 } 1202 status := resp.GetStatus() 1203 if status == nil { 1204 return remote.ErrContainerStatusNil 1205 } 1206 return m.ReadLogs(ctx, status.GetLogPath(), containerID.ID, logOptions, stdout, stderr) 1207 } 1208 1209 // GetExec gets the endpoint the runtime will serve the exec request from. 1210 func (m *kubeGenericRuntimeManager) GetExec(ctx context.Context, id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) { 1211 req := &runtimeapi.ExecRequest{ 1212 ContainerId: id.ID, 1213 Cmd: cmd, 1214 Tty: tty, 1215 Stdin: stdin, 1216 Stdout: stdout, 1217 Stderr: stderr, 1218 } 1219 resp, err := m.runtimeService.Exec(ctx, req) 1220 if err != nil { 1221 return nil, err 1222 } 1223 1224 return url.Parse(resp.Url) 1225 } 1226 1227 // GetAttach gets the endpoint the runtime will serve the attach request from. 1228 func (m *kubeGenericRuntimeManager) GetAttach(ctx context.Context, id kubecontainer.ContainerID, stdin, stdout, stderr, tty bool) (*url.URL, error) { 1229 req := &runtimeapi.AttachRequest{ 1230 ContainerId: id.ID, 1231 Stdin: stdin, 1232 Stdout: stdout, 1233 Stderr: stderr, 1234 Tty: tty, 1235 } 1236 resp, err := m.runtimeService.Attach(ctx, req) 1237 if err != nil { 1238 return nil, err 1239 } 1240 return url.Parse(resp.Url) 1241 } 1242 1243 // RunInContainer synchronously executes the command in the container, and returns the output. 1244 func (m *kubeGenericRuntimeManager) RunInContainer(ctx context.Context, id kubecontainer.ContainerID, cmd []string, timeout time.Duration) ([]byte, error) { 1245 stdout, stderr, err := m.runtimeService.ExecSync(ctx, id.ID, cmd, timeout) 1246 // NOTE(tallclair): This does not correctly interleave stdout & stderr, but should be sufficient 1247 // for logging purposes. A combined output option will need to be added to the ExecSyncRequest 1248 // if more precise output ordering is ever required. 1249 return append(stdout, stderr...), err 1250 } 1251 1252 // removeContainer removes the container and the container logs. 1253 // Notice that we remove the container logs first, so that container will not be removed if 1254 // container logs are failed to be removed, and kubelet will retry this later. This guarantees 1255 // that container logs to be removed with the container. 1256 // Notice that we assume that the container should only be removed in non-running state, and 1257 // it will not write container logs anymore in that state. 1258 func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, containerID string) error { 1259 klog.V(4).InfoS("Removing container", "containerID", containerID) 1260 // Call internal container post-stop lifecycle hook. 1261 if err := m.internalLifecycle.PostStopContainer(containerID); err != nil { 1262 return err 1263 } 1264 1265 // Remove the container log. 1266 // TODO: Separate log and container lifecycle management. 1267 if err := m.removeContainerLog(ctx, containerID); err != nil { 1268 return err 1269 } 1270 // Remove the container. 1271 return m.runtimeService.RemoveContainer(ctx, containerID) 1272 } 1273 1274 // removeContainerLog removes the container log. 1275 func (m *kubeGenericRuntimeManager) removeContainerLog(ctx context.Context, containerID string) error { 1276 // Use log manager to remove rotated logs. 1277 err := m.logManager.Clean(ctx, containerID) 1278 if err != nil { 1279 return err 1280 } 1281 1282 resp, err := m.runtimeService.ContainerStatus(ctx, containerID, false) 1283 if err != nil { 1284 return fmt.Errorf("failed to get container status %q: %v", containerID, err) 1285 } 1286 status := resp.GetStatus() 1287 if status == nil { 1288 return remote.ErrContainerStatusNil 1289 } 1290 // Remove the legacy container log symlink. 1291 // TODO(random-liu): Remove this after cluster logging supports CRI container log path. 1292 labeledInfo := getContainerInfoFromLabels(status.Labels) 1293 legacySymlink := legacyLogSymlink(containerID, labeledInfo.ContainerName, labeledInfo.PodName, 1294 labeledInfo.PodNamespace) 1295 if err := m.osInterface.Remove(legacySymlink); err != nil && !os.IsNotExist(err) { 1296 return fmt.Errorf("failed to remove container %q log legacy symbolic link %q: %v", 1297 containerID, legacySymlink, err) 1298 } 1299 return nil 1300 } 1301 1302 // DeleteContainer removes a container. 1303 func (m *kubeGenericRuntimeManager) DeleteContainer(ctx context.Context, containerID kubecontainer.ContainerID) error { 1304 return m.removeContainer(ctx, containerID.ID) 1305 } 1306 1307 // setTerminationGracePeriod determines the grace period to use when killing a container 1308 func setTerminationGracePeriod(pod *v1.Pod, containerSpec *v1.Container, containerName string, containerID kubecontainer.ContainerID, reason containerKillReason) int64 { 1309 gracePeriod := int64(minimumGracePeriodInSeconds) 1310 switch { 1311 case pod.DeletionGracePeriodSeconds != nil: 1312 return *pod.DeletionGracePeriodSeconds 1313 case pod.Spec.TerminationGracePeriodSeconds != nil: 1314 switch reason { 1315 case reasonStartupProbe: 1316 if isProbeTerminationGracePeriodSecondsSet(pod, containerSpec, containerSpec.StartupProbe, containerName, containerID, "StartupProbe") { 1317 return *containerSpec.StartupProbe.TerminationGracePeriodSeconds 1318 } 1319 case reasonLivenessProbe: 1320 if isProbeTerminationGracePeriodSecondsSet(pod, containerSpec, containerSpec.LivenessProbe, containerName, containerID, "LivenessProbe") { 1321 return *containerSpec.LivenessProbe.TerminationGracePeriodSeconds 1322 } 1323 } 1324 return *pod.Spec.TerminationGracePeriodSeconds 1325 } 1326 return gracePeriod 1327 } 1328 1329 func isProbeTerminationGracePeriodSecondsSet(pod *v1.Pod, containerSpec *v1.Container, probe *v1.Probe, containerName string, containerID kubecontainer.ContainerID, probeType string) bool { 1330 if probe != nil && probe.TerminationGracePeriodSeconds != nil { 1331 if *probe.TerminationGracePeriodSeconds > *pod.Spec.TerminationGracePeriodSeconds { 1332 klog.V(4).InfoS("Using probe-level grace period that is greater than the pod-level grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", containerName, "containerID", containerID.String(), "probeType", probeType, "probeGracePeriod", *probe.TerminationGracePeriodSeconds, "podGracePeriod", *pod.Spec.TerminationGracePeriodSeconds) 1333 } 1334 return true 1335 } 1336 return false 1337 }