k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/node/pods.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "math/rand" 25 "net/http" 26 "regexp" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 32 v1 "k8s.io/api/core/v1" 33 "k8s.io/apimachinery/pkg/api/resource" 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 "k8s.io/apimachinery/pkg/fields" 36 "k8s.io/apimachinery/pkg/labels" 37 "k8s.io/apimachinery/pkg/util/uuid" 38 "k8s.io/apimachinery/pkg/util/wait" 39 "k8s.io/apimachinery/pkg/watch" 40 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 41 "k8s.io/client-go/util/retry" 42 "k8s.io/kubernetes/pkg/kubelet/events" 43 "k8s.io/kubernetes/test/e2e/framework" 44 e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" 45 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 46 imageutils "k8s.io/kubernetes/test/utils/image" 47 admissionapi "k8s.io/pod-security-admission/api" 48 utilpointer "k8s.io/utils/pointer" 49 50 "github.com/onsi/ginkgo/v2" 51 "github.com/onsi/gomega" 52 "github.com/prometheus/client_golang/prometheus" 53 "github.com/prometheus/common/expfmt" 54 ) 55 56 var _ = SIGDescribe("Pods Extended", func() { 57 f := framework.NewDefaultFramework("pods") 58 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 59 60 ginkgo.Describe("Delete Grace Period", func() { 61 var podClient *e2epod.PodClient 62 ginkgo.BeforeEach(func() { 63 podClient = e2epod.NewPodClient(f) 64 }) 65 66 /* 67 Release: v1.15 68 Testname: Pods, delete grace period 69 Description: Create a pod, make sure it is running. Using the http client send a 'delete' with gracePeriodSeconds=30. Pod SHOULD get terminated within gracePeriodSeconds and removed from API server within a window. 70 */ 71 ginkgo.It("should be submitted and removed", func(ctx context.Context) { 72 ginkgo.By("creating the pod") 73 name := "pod-submit-remove-" + string(uuid.NewUUID()) 74 value := strconv.Itoa(time.Now().Nanosecond()) 75 pod := e2epod.NewAgnhostPod(f.Namespace.Name, name, nil, nil, nil) 76 pod.ObjectMeta.Labels = map[string]string{ 77 "name": "foo", 78 "time": value, 79 } 80 81 ginkgo.By("setting up selector") 82 selector := labels.SelectorFromSet(labels.Set(map[string]string{"time": value})) 83 options := metav1.ListOptions{LabelSelector: selector.String()} 84 pods, err := podClient.List(ctx, options) 85 framework.ExpectNoError(err, "failed to query for pod") 86 gomega.Expect(pods.Items).To(gomega.BeEmpty()) 87 88 ginkgo.By("submitting the pod to kubernetes") 89 podClient.Create(ctx, pod) 90 91 ginkgo.By("verifying the pod is in kubernetes") 92 selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value})) 93 options = metav1.ListOptions{LabelSelector: selector.String()} 94 pods, err = podClient.List(ctx, options) 95 framework.ExpectNoError(err, "failed to query for pod") 96 gomega.Expect(pods.Items).To(gomega.HaveLen(1)) 97 98 // We need to wait for the pod to be running, otherwise the deletion 99 // may be carried out immediately rather than gracefully. 100 framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)) 101 // save the running pod 102 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 103 framework.ExpectNoError(err, "failed to GET scheduled pod") 104 105 ginkgo.By("deleting the pod gracefully") 106 var lastPod v1.Pod 107 var statusCode int 108 err = f.ClientSet.CoreV1().RESTClient().Delete().AbsPath("/api/v1/namespaces", pod.Namespace, "pods", pod.Name).Param("gracePeriodSeconds", "30").Do(ctx).StatusCode(&statusCode).Into(&lastPod) 109 framework.ExpectNoError(err, "failed to use http client to send delete") 110 gomega.Expect(statusCode).To(gomega.Equal(http.StatusOK), "failed to delete gracefully by client request") 111 112 ginkgo.By("verifying the kubelet observed the termination notice") 113 114 // allow up to 3x grace period (which allows process termination) 115 // for the kubelet to remove from api. need to follow-up on if this 116 // latency between termination and reportal can be isolated further. 117 start := time.Now() 118 err = wait.Poll(time.Second*5, time.Second*30*3, func() (bool, error) { 119 podList, err := e2ekubelet.GetKubeletPods(ctx, f.ClientSet, pod.Spec.NodeName) 120 if err != nil { 121 framework.Logf("Unable to retrieve kubelet pods for node %v: %v", pod.Spec.NodeName, err) 122 return false, nil 123 } 124 for _, kubeletPod := range podList.Items { 125 if pod.Name != kubeletPod.Name || pod.Namespace != kubeletPod.Namespace { 126 continue 127 } 128 if kubeletPod.ObjectMeta.DeletionTimestamp == nil { 129 framework.Logf("deletion has not yet been observed") 130 return false, nil 131 } 132 data, _ := json.Marshal(kubeletPod) 133 framework.Logf("start=%s, now=%s, kubelet pod: %s", start, time.Now(), string(data)) 134 return false, nil 135 } 136 framework.Logf("no pod exists with the name we were looking for, assuming the termination request was observed and completed") 137 return true, nil 138 }) 139 framework.ExpectNoError(err, "kubelet never observed the termination notice") 140 141 gomega.Expect(lastPod.DeletionTimestamp).ToNot(gomega.BeNil()) 142 gomega.Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(gomega.BeZero()) 143 144 selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value})) 145 options = metav1.ListOptions{LabelSelector: selector.String()} 146 pods, err = podClient.List(ctx, options) 147 framework.ExpectNoError(err, "failed to query for pods") 148 gomega.Expect(pods.Items).To(gomega.BeEmpty()) 149 }) 150 }) 151 152 ginkgo.Describe("Pods Set QOS Class", func() { 153 var podClient *e2epod.PodClient 154 ginkgo.BeforeEach(func() { 155 podClient = e2epod.NewPodClient(f) 156 }) 157 158 /* 159 Release: v1.9 160 Testname: Pods, QOS 161 Description: Create a Pod with CPU and Memory request and limits. Pod status MUST have QOSClass set to PodQOSGuaranteed. 162 */ 163 framework.ConformanceIt("should be set on Pods with matching resource requests and limits for memory and cpu", func(ctx context.Context) { 164 ginkgo.By("creating the pod") 165 name := "pod-qos-class-" + string(uuid.NewUUID()) 166 pod := &v1.Pod{ 167 ObjectMeta: metav1.ObjectMeta{ 168 Name: name, 169 Labels: map[string]string{ 170 "name": name, 171 }, 172 }, 173 Spec: v1.PodSpec{ 174 Containers: []v1.Container{ 175 { 176 Name: "agnhost", 177 Image: imageutils.GetE2EImage(imageutils.Agnhost), 178 Args: []string{"pause"}, 179 Resources: v1.ResourceRequirements{ 180 Limits: v1.ResourceList{ 181 v1.ResourceCPU: resource.MustParse("100m"), 182 v1.ResourceMemory: resource.MustParse("100Mi"), 183 }, 184 Requests: v1.ResourceList{ 185 v1.ResourceCPU: resource.MustParse("100m"), 186 v1.ResourceMemory: resource.MustParse("100Mi"), 187 }, 188 }, 189 }, 190 }, 191 }, 192 } 193 194 ginkgo.By("submitting the pod to kubernetes") 195 podClient.Create(ctx, pod) 196 197 ginkgo.By("verifying QOS class is set on the pod") 198 pod, err := podClient.Get(ctx, name, metav1.GetOptions{}) 199 framework.ExpectNoError(err, "failed to query for pod") 200 gomega.Expect(pod.Status.QOSClass).To(gomega.Equal(v1.PodQOSGuaranteed)) 201 }) 202 }) 203 204 ginkgo.Describe("Pod Container Status", func() { 205 var podClient *e2epod.PodClient 206 ginkgo.BeforeEach(func() { 207 podClient = e2epod.NewPodClient(f) 208 }) 209 210 ginkgo.It("should never report success for a pending container", func(ctx context.Context) { 211 ginkgo.By("creating pods that should always exit 1 and terminating the pod after a random delay") 212 createAndTestPodRepeatedly(ctx, 213 3, 15, 214 podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000}, 215 podClient.PodInterface, 216 ) 217 }) 218 ginkgo.It("should never report container start when an init container fails", func(ctx context.Context) { 219 ginkgo.By("creating pods with an init container that always exit 1 and terminating the pod after a random delay") 220 createAndTestPodRepeatedly(ctx, 221 3, 15, 222 podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000, initContainer: true}, 223 podClient.PodInterface, 224 ) 225 }) 226 }) 227 228 ginkgo.Describe("Pod Container lifecycle", func() { 229 var podClient *e2epod.PodClient 230 ginkgo.BeforeEach(func() { 231 podClient = e2epod.NewPodClient(f) 232 }) 233 234 ginkgo.It("should not create extra sandbox if all containers are done", func(ctx context.Context) { 235 ginkgo.By("creating the pod that should always exit 0") 236 237 name := "pod-always-succeed" + string(uuid.NewUUID()) 238 image := imageutils.GetE2EImage(imageutils.BusyBox) 239 pod := &v1.Pod{ 240 ObjectMeta: metav1.ObjectMeta{ 241 Name: name, 242 }, 243 Spec: v1.PodSpec{ 244 RestartPolicy: v1.RestartPolicyOnFailure, 245 InitContainers: []v1.Container{ 246 { 247 Name: "foo", 248 Image: image, 249 Command: []string{ 250 "/bin/true", 251 }, 252 }, 253 }, 254 Containers: []v1.Container{ 255 { 256 Name: "bar", 257 Image: image, 258 Command: []string{ 259 "/bin/true", 260 }, 261 }, 262 }, 263 }, 264 } 265 266 ginkgo.By("submitting the pod to kubernetes") 267 createdPod := podClient.Create(ctx, pod) 268 ginkgo.DeferCleanup(func(ctx context.Context) error { 269 ginkgo.By("deleting the pod") 270 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{}) 271 }) 272 273 framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)) 274 275 var eventList *v1.EventList 276 var err error 277 ginkgo.By("Getting events about the pod") 278 framework.ExpectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { 279 selector := fields.Set{ 280 "involvedObject.kind": "Pod", 281 "involvedObject.uid": string(createdPod.UID), 282 "involvedObject.namespace": f.Namespace.Name, 283 "source": "kubelet", 284 }.AsSelector().String() 285 options := metav1.ListOptions{FieldSelector: selector} 286 eventList, err = f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, options) 287 if err != nil { 288 return false, err 289 } 290 if len(eventList.Items) > 0 { 291 return true, nil 292 } 293 return false, nil 294 })) 295 296 ginkgo.By("Checking events about the pod") 297 for _, event := range eventList.Items { 298 if event.Reason == events.SandboxChanged { 299 framework.Fail("Unexpected SandboxChanged event") 300 } 301 } 302 }) 303 304 ginkgo.It("evicted pods should be terminal", func(ctx context.Context) { 305 ginkgo.By("creating the pod that should be evicted") 306 307 name := "pod-should-be-evicted" + string(uuid.NewUUID()) 308 image := imageutils.GetE2EImage(imageutils.BusyBox) 309 pod := &v1.Pod{ 310 ObjectMeta: metav1.ObjectMeta{ 311 Name: name, 312 }, 313 Spec: v1.PodSpec{ 314 RestartPolicy: v1.RestartPolicyOnFailure, 315 Containers: []v1.Container{ 316 { 317 Name: "bar", 318 Image: image, 319 Command: []string{ 320 "/bin/sh", "-c", "sleep 10; dd if=/dev/zero of=file bs=1M count=10; sleep 10000", 321 }, 322 Resources: v1.ResourceRequirements{ 323 Limits: v1.ResourceList{ 324 "ephemeral-storage": resource.MustParse("5Mi"), 325 }, 326 }}, 327 }, 328 }, 329 } 330 331 ginkgo.By("submitting the pod to kubernetes") 332 podClient.Create(ctx, pod) 333 ginkgo.DeferCleanup(func(ctx context.Context) error { 334 ginkgo.By("deleting the pod") 335 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{}) 336 }) 337 338 err := e2epod.WaitForPodTerminatedInNamespace(ctx, f.ClientSet, pod.Name, "Evicted", f.Namespace.Name) 339 if err != nil { 340 framework.Failf("error waiting for pod to be evicted: %v", err) 341 } 342 343 }) 344 }) 345 346 ginkgo.Describe("Pod TerminationGracePeriodSeconds is negative", func() { 347 var podClient *e2epod.PodClient 348 ginkgo.BeforeEach(func() { 349 podClient = e2epod.NewPodClient(f) 350 }) 351 352 ginkgo.It("pod with negative grace period", func(ctx context.Context) { 353 name := "pod-negative-grace-period" + string(uuid.NewUUID()) 354 image := imageutils.GetE2EImage(imageutils.BusyBox) 355 pod := &v1.Pod{ 356 ObjectMeta: metav1.ObjectMeta{ 357 Name: name, 358 }, 359 Spec: v1.PodSpec{ 360 RestartPolicy: v1.RestartPolicyOnFailure, 361 Containers: []v1.Container{ 362 { 363 Name: "foo", 364 Image: image, 365 Command: []string{ 366 "/bin/sh", "-c", "sleep 10000", 367 }, 368 }, 369 }, 370 TerminationGracePeriodSeconds: utilpointer.Int64(-1), 371 }, 372 } 373 374 ginkgo.By("submitting the pod to kubernetes") 375 podClient.Create(ctx, pod) 376 377 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 378 framework.ExpectNoError(err, "failed to query for pod") 379 380 if pod.Spec.TerminationGracePeriodSeconds == nil { 381 framework.Failf("pod spec TerminationGracePeriodSeconds is nil") 382 } 383 384 if *pod.Spec.TerminationGracePeriodSeconds != 1 { 385 framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds) 386 } 387 388 // retry if the TerminationGracePeriodSeconds is overrided 389 // see more in https://github.com/kubernetes/kubernetes/pull/115606 390 err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { 391 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 392 framework.ExpectNoError(err, "failed to query for pod") 393 ginkgo.By("updating the pod to have a negative TerminationGracePeriodSeconds") 394 pod.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(-1) 395 _, err = podClient.PodInterface.Update(ctx, pod, metav1.UpdateOptions{}) 396 return err 397 }) 398 framework.ExpectNoError(err, "failed to update pod") 399 400 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 401 framework.ExpectNoError(err, "failed to query for pod") 402 403 if pod.Spec.TerminationGracePeriodSeconds == nil { 404 framework.Failf("pod spec TerminationGracePeriodSeconds is nil") 405 } 406 407 if *pod.Spec.TerminationGracePeriodSeconds != 1 { 408 framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds) 409 } 410 411 ginkgo.DeferCleanup(func(ctx context.Context) error { 412 ginkgo.By("deleting the pod") 413 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{}) 414 }) 415 }) 416 }) 417 418 }) 419 420 func createAndTestPodRepeatedly(ctx context.Context, workers, iterations int, scenario podScenario, podClient v1core.PodInterface) { 421 var ( 422 lock sync.Mutex 423 errs []error 424 425 wg sync.WaitGroup 426 ) 427 428 r := prometheus.NewRegistry() 429 h := prometheus.NewSummaryVec(prometheus.SummaryOpts{ 430 Name: "latency", 431 Objectives: map[float64]float64{ 432 0.5: 0.05, 433 0.75: 0.025, 434 0.9: 0.01, 435 0.99: 0.001, 436 }, 437 }, []string{"node"}) 438 r.MustRegister(h) 439 440 for i := 0; i < workers; i++ { 441 wg.Add(1) 442 go func(i int) { 443 defer ginkgo.GinkgoRecover() 444 defer wg.Done() 445 for retries := 0; retries < iterations; retries++ { 446 pod := scenario.Pod(i, retries) 447 448 // create the pod, capture the change events, then delete the pod 449 start := time.Now() 450 created, err := podClient.Create(ctx, pod, metav1.CreateOptions{}) 451 framework.ExpectNoError(err, "failed to create pod") 452 453 ch := make(chan []watch.Event) 454 waitForWatch := make(chan struct{}) 455 go func() { 456 defer ginkgo.GinkgoRecover() 457 defer close(ch) 458 w, err := podClient.Watch(ctx, metav1.ListOptions{ 459 ResourceVersion: created.ResourceVersion, 460 FieldSelector: fmt.Sprintf("metadata.name=%s", pod.Name), 461 }) 462 if err != nil { 463 framework.Logf("Unable to watch pod %s: %v", pod.Name, err) 464 return 465 } 466 defer w.Stop() 467 close(waitForWatch) 468 events := []watch.Event{ 469 {Type: watch.Added, Object: created}, 470 } 471 for event := range w.ResultChan() { 472 events = append(events, event) 473 if event.Type == watch.Error { 474 framework.Logf("watch error seen for %s: %#v", pod.Name, event.Object) 475 } 476 if scenario.IsLastEvent(event) { 477 framework.Logf("watch last event seen for %s", pod.Name) 478 break 479 } 480 } 481 ch <- events 482 }() 483 484 select { 485 case <-ch: // in case the goroutine above exits before establishing the watch 486 case <-waitForWatch: // when the watch is established 487 } 488 489 verifier, scenario, err := scenario.Action(ctx, pod) 490 framework.ExpectNoError(err, "failed to take action") 491 492 var ( 493 events []watch.Event 494 ok bool 495 ) 496 select { 497 case events, ok = <-ch: 498 if !ok { 499 continue 500 } 501 if len(events) < 2 { 502 framework.Fail("only got a single event") 503 } 504 case <-time.After(5 * time.Minute): 505 framework.Failf("timed out waiting for watch events for %s", pod.Name) 506 } 507 508 end := time.Now() 509 510 var eventErr error 511 for _, event := range events[1:] { 512 if err := verifier.Verify(event); err != nil { 513 eventErr = err 514 break 515 } 516 } 517 518 total := end.Sub(start) 519 520 var lastPod *v1.Pod = pod 521 func() { 522 lock.Lock() 523 defer lock.Unlock() 524 525 if eventErr != nil { 526 errs = append(errs, eventErr) 527 return 528 } 529 pod, verifyErrs := verifier.VerifyFinal(scenario, total) 530 if pod != nil { 531 lastPod = pod 532 } 533 errs = append(errs, verifyErrs...) 534 }() 535 536 h.WithLabelValues(lastPod.Spec.NodeName).Observe(total.Seconds()) 537 } 538 }(i) 539 } 540 541 wg.Wait() 542 543 if len(errs) > 0 { 544 var messages []string 545 for _, err := range errs { 546 messages = append(messages, err.Error()) 547 } 548 framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n")) 549 } 550 values, _ := r.Gather() 551 var buf bytes.Buffer 552 for _, m := range values { 553 expfmt.MetricFamilyToText(&buf, m) 554 } 555 framework.Logf("Summary of latencies:\n%s", buf.String()) 556 } 557 558 type podScenario interface { 559 Pod(worker, attempt int) *v1.Pod 560 Action(context.Context, *v1.Pod) (podScenarioVerifier, string, error) 561 IsLastEvent(event watch.Event) bool 562 } 563 564 type podScenarioVerifier interface { 565 Verify(event watch.Event) error 566 VerifyFinal(scenario string, duration time.Duration) (*v1.Pod, []error) 567 } 568 569 type podFastDeleteScenario struct { 570 client v1core.PodInterface 571 delayMs int 572 573 initContainer bool 574 } 575 576 func (s podFastDeleteScenario) Verifier(pod *v1.Pod) podScenarioVerifier { 577 return &podStartVerifier{} 578 } 579 580 func (s podFastDeleteScenario) IsLastEvent(event watch.Event) bool { 581 if event.Type == watch.Deleted { 582 return true 583 } 584 return false 585 } 586 587 func (s podFastDeleteScenario) Action(ctx context.Context, pod *v1.Pod) (podScenarioVerifier, string, error) { 588 t := time.Duration(rand.Intn(s.delayMs)) * time.Millisecond 589 scenario := fmt.Sprintf("t=%s", t) 590 time.Sleep(t) 591 return &podStartVerifier{pod: pod}, scenario, s.client.Delete(ctx, pod.Name, metav1.DeleteOptions{}) 592 } 593 594 func (s podFastDeleteScenario) Pod(worker, attempt int) *v1.Pod { 595 name := fmt.Sprintf("pod-terminate-status-%d-%d", worker, attempt) 596 value := strconv.Itoa(time.Now().Nanosecond()) 597 one := int64(1) 598 if s.initContainer { 599 return &v1.Pod{ 600 ObjectMeta: metav1.ObjectMeta{ 601 Name: name, 602 Labels: map[string]string{ 603 "name": "foo", 604 "time": value, 605 }, 606 }, 607 Spec: v1.PodSpec{ 608 RestartPolicy: v1.RestartPolicyNever, 609 TerminationGracePeriodSeconds: &one, 610 InitContainers: []v1.Container{ 611 { 612 Name: "fail", 613 Image: imageutils.GetE2EImage(imageutils.BusyBox), 614 Command: []string{ 615 "/bin/false", 616 }, 617 Resources: v1.ResourceRequirements{ 618 Requests: v1.ResourceList{ 619 v1.ResourceCPU: resource.MustParse("5m"), 620 v1.ResourceMemory: resource.MustParse("10Mi"), 621 }, 622 }, 623 }, 624 }, 625 Containers: []v1.Container{ 626 { 627 Name: "blocked", 628 Image: imageutils.GetE2EImage(imageutils.BusyBox), 629 Command: []string{ 630 "/bin/true", 631 }, 632 Resources: v1.ResourceRequirements{ 633 Requests: v1.ResourceList{ 634 v1.ResourceCPU: resource.MustParse("5m"), 635 v1.ResourceMemory: resource.MustParse("10Mi"), 636 }, 637 }, 638 }, 639 }, 640 }, 641 } 642 } 643 return &v1.Pod{ 644 ObjectMeta: metav1.ObjectMeta{ 645 Name: name, 646 Labels: map[string]string{ 647 "name": "foo", 648 "time": value, 649 }, 650 }, 651 Spec: v1.PodSpec{ 652 RestartPolicy: v1.RestartPolicyNever, 653 TerminationGracePeriodSeconds: &one, 654 Containers: []v1.Container{ 655 { 656 Name: "fail", 657 Image: imageutils.GetE2EImage(imageutils.BusyBox), 658 Command: []string{ 659 "/bin/false", 660 }, 661 Resources: v1.ResourceRequirements{ 662 Requests: v1.ResourceList{ 663 v1.ResourceCPU: resource.MustParse("5m"), 664 v1.ResourceMemory: resource.MustParse("10Mi"), 665 }, 666 }, 667 }, 668 }, 669 }, 670 } 671 } 672 673 // podStartVerifier checks events for a given pod and looks for unexpected 674 // transitions. It assumes one container running to completion. 675 type podStartVerifier struct { 676 pod *v1.Pod 677 hasInitContainers bool 678 hasContainers bool 679 hasTerminated bool 680 hasRunningContainers bool 681 hasTerminalPhase bool 682 duration time.Duration 683 completeDuration time.Duration 684 } 685 686 var reBug88766 = regexp.MustCompile(`rootfs_linux.*kubernetes\.io~(secret|projected).*no such file or directory`) 687 688 // Verify takes successive watch events for a given pod and returns an error if the status is unexpected. 689 // This verifier works for any pod which has 0 init containers and 1 regular container. 690 func (v *podStartVerifier) Verify(event watch.Event) error { 691 var ok bool 692 pod, ok := event.Object.(*v1.Pod) 693 if !ok { 694 framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object) 695 return nil 696 } 697 v.pod = pod 698 699 if len(pod.Spec.InitContainers) > 0 { 700 if len(pod.Status.InitContainerStatuses) == 0 { 701 if v.hasInitContainers { 702 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) 703 } 704 return nil 705 } 706 v.hasInitContainers = true 707 if len(pod.Status.InitContainerStatuses) != 1 { 708 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) 709 } 710 711 } else { 712 if len(pod.Status.InitContainerStatuses) != 0 { 713 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses) 714 } 715 } 716 717 if len(pod.Status.ContainerStatuses) == 0 { 718 if v.hasContainers { 719 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) 720 } 721 return nil 722 } 723 v.hasContainers = true 724 if len(pod.Status.ContainerStatuses) != 1 { 725 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses) 726 } 727 728 if status := e2epod.FindContainerStatusInPod(pod, "blocked"); status != nil { 729 if (status.Started != nil && *status.Started) || status.LastTerminationState.Terminated != nil || status.State.Waiting == nil { 730 return fmt.Errorf("pod %s on node %s should not have started the blocked container: %#v", pod.Name, pod.Spec.NodeName, status) 731 } 732 } 733 734 status := e2epod.FindContainerStatusInPod(pod, "fail") 735 if status == nil { 736 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status) 737 } 738 739 t := status.State.Terminated 740 if v.hasTerminated { 741 if status.State.Waiting != nil || status.State.Running != nil { 742 return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status) 743 } 744 if t == nil { 745 return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status) 746 } 747 } 748 var hasNoStartTime bool 749 v.hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil 750 if t != nil { 751 if !t.FinishedAt.Time.IsZero() { 752 if t.StartedAt.IsZero() { 753 hasNoStartTime = true 754 } else { 755 v.duration = t.FinishedAt.Sub(t.StartedAt.Time) 756 } 757 v.completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time) 758 } 759 760 defer func() { v.hasTerminated = true }() 761 switch { 762 case t.ExitCode == 1: 763 // expected 764 case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"): 765 // expected, pod was force-killed after grace period 766 case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message): 767 // pod volume teardown races with container start in CRI, which reports a failure 768 framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName) 769 default: 770 data, _ := json.MarshalIndent(pod.Status, "", " ") 771 framework.Logf("pod %s on node %s had incorrect final status:\n%s", pod.Name, pod.Spec.NodeName, string(data)) 772 return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message) 773 } 774 switch { 775 case v.duration > time.Hour: 776 // problem with status reporting 777 return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, v.duration, t.StartedAt, t.FinishedAt) 778 case hasNoStartTime: 779 // should never happen 780 return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt) 781 } 782 } 783 if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { 784 v.hasTerminalPhase = true 785 } else { 786 if v.hasTerminalPhase { 787 return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status) 788 } 789 } 790 return nil 791 } 792 793 func (v *podStartVerifier) VerifyFinal(scenario string, total time.Duration) (*v1.Pod, []error) { 794 var errs []error 795 pod := v.pod 796 if !v.hasTerminalPhase { 797 var names []string 798 for _, status := range pod.Status.ContainerStatuses { 799 if status.State.Running != nil { 800 names = append(names, status.Name) 801 } 802 } 803 switch { 804 case len(names) > 0: 805 errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ","))) 806 case pod.Status.Phase != v1.PodPending: 807 errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase)) 808 } 809 } 810 if v.hasRunningContainers { 811 data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", " ") 812 errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data))) 813 } 814 815 framework.Logf("Pod %s on node %s %s total=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, scenario, total, v.completeDuration, v.duration) 816 return pod, errs 817 }