k8s.io/kubernetes@v1.29.3/pkg/kubelet/pleg/generic_test.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pleg 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "reflect" 24 "sort" 25 "strings" 26 "testing" 27 "time" 28 29 "github.com/golang/mock/gomock" 30 "github.com/google/go-cmp/cmp" 31 "github.com/stretchr/testify/assert" 32 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/component-base/metrics/testutil" 35 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 36 containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" 37 "k8s.io/kubernetes/pkg/kubelet/metrics" 38 "k8s.io/utils/clock" 39 testingclock "k8s.io/utils/clock/testing" 40 ) 41 42 const ( 43 testContainerRuntimeType = "fooRuntime" 44 // largeChannelCap is a large enough capacity to hold all events in a single test. 45 largeChannelCap = 100 46 ) 47 48 type TestGenericPLEG struct { 49 pleg *GenericPLEG 50 runtime *containertest.FakeRuntime 51 clock *testingclock.FakeClock 52 } 53 54 func newTestGenericPLEG() *TestGenericPLEG { 55 return newTestGenericPLEGWithChannelSize(largeChannelCap) 56 } 57 58 func newTestGenericPLEGWithChannelSize(eventChannelCap int) *TestGenericPLEG { 59 fakeRuntime := &containertest.FakeRuntime{} 60 clock := testingclock.NewFakeClock(time.Time{}) 61 // The channel capacity should be large enough to hold all events in a 62 // single test. 63 pleg := &GenericPLEG{ 64 relistDuration: &RelistDuration{RelistPeriod: time.Hour, RelistThreshold: 3 * time.Minute}, 65 runtime: fakeRuntime, 66 eventChannel: make(chan *PodLifecycleEvent, eventChannelCap), 67 podRecords: make(podRecords), 68 clock: clock, 69 } 70 return &TestGenericPLEG{pleg: pleg, runtime: fakeRuntime, clock: clock} 71 } 72 73 func getEventsFromChannel(ch <-chan *PodLifecycleEvent) []*PodLifecycleEvent { 74 events := []*PodLifecycleEvent{} 75 for len(ch) > 0 { 76 e := <-ch 77 events = append(events, e) 78 } 79 return events 80 } 81 82 func createTestContainer(ID string, state kubecontainer.State) *kubecontainer.Container { 83 return &kubecontainer.Container{ 84 ID: kubecontainer.ContainerID{Type: testContainerRuntimeType, ID: ID}, 85 State: state, 86 } 87 } 88 89 type sortableEvents []*PodLifecycleEvent 90 91 func (a sortableEvents) Len() int { return len(a) } 92 func (a sortableEvents) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 93 func (a sortableEvents) Less(i, j int) bool { 94 if a[i].ID != a[j].ID { 95 return a[i].ID < a[j].ID 96 } 97 return a[i].Data.(string) < a[j].Data.(string) 98 } 99 100 func verifyEvents(t *testing.T, expected, actual []*PodLifecycleEvent) { 101 sort.Sort(sortableEvents(expected)) 102 sort.Sort(sortableEvents(actual)) 103 if !reflect.DeepEqual(expected, actual) { 104 t.Errorf("Actual events differ from the expected; diff:\n %v", cmp.Diff(expected, actual)) 105 } 106 } 107 108 func TestRelisting(t *testing.T) { 109 testPleg := newTestGenericPLEG() 110 pleg, runtime := testPleg.pleg, testPleg.runtime 111 ch := pleg.Watch() 112 // The first relist should send a PodSync event to each pod. 113 runtime.AllPodList = []*containertest.FakePod{ 114 {Pod: &kubecontainer.Pod{ 115 ID: "1234", 116 Containers: []*kubecontainer.Container{ 117 createTestContainer("c1", kubecontainer.ContainerStateExited), 118 createTestContainer("c2", kubecontainer.ContainerStateRunning), 119 createTestContainer("c3", kubecontainer.ContainerStateUnknown), 120 }, 121 }}, 122 {Pod: &kubecontainer.Pod{ 123 ID: "4567", 124 Containers: []*kubecontainer.Container{ 125 createTestContainer("c1", kubecontainer.ContainerStateExited), 126 }, 127 }}, 128 } 129 pleg.Relist() 130 // Report every running/exited container if we see them for the first time. 131 expected := []*PodLifecycleEvent{ 132 {ID: "1234", Type: ContainerStarted, Data: "c2"}, 133 {ID: "4567", Type: ContainerDied, Data: "c1"}, 134 {ID: "1234", Type: ContainerDied, Data: "c1"}, 135 } 136 actual := getEventsFromChannel(ch) 137 verifyEvents(t, expected, actual) 138 139 // The second relist should not send out any event because no container has 140 // changed. 141 pleg.Relist() 142 actual = getEventsFromChannel(ch) 143 assert.True(t, len(actual) == 0, "no container has changed, event length should be 0") 144 145 runtime.AllPodList = []*containertest.FakePod{ 146 {Pod: &kubecontainer.Pod{ 147 ID: "1234", 148 Containers: []*kubecontainer.Container{ 149 createTestContainer("c2", kubecontainer.ContainerStateExited), 150 createTestContainer("c3", kubecontainer.ContainerStateRunning), 151 }, 152 }}, 153 {Pod: &kubecontainer.Pod{ 154 ID: "4567", 155 Containers: []*kubecontainer.Container{ 156 createTestContainer("c4", kubecontainer.ContainerStateRunning), 157 }, 158 }}, 159 } 160 pleg.Relist() 161 // Only report containers that transitioned to running or exited status. 162 expected = []*PodLifecycleEvent{ 163 {ID: "1234", Type: ContainerRemoved, Data: "c1"}, 164 {ID: "1234", Type: ContainerDied, Data: "c2"}, 165 {ID: "1234", Type: ContainerStarted, Data: "c3"}, 166 {ID: "4567", Type: ContainerRemoved, Data: "c1"}, 167 {ID: "4567", Type: ContainerStarted, Data: "c4"}, 168 } 169 170 actual = getEventsFromChannel(ch) 171 verifyEvents(t, expected, actual) 172 } 173 174 // TestEventChannelFull test when channel is full, the events will be discard. 175 func TestEventChannelFull(t *testing.T) { 176 testPleg := newTestGenericPLEGWithChannelSize(4) 177 pleg, runtime := testPleg.pleg, testPleg.runtime 178 ch := pleg.Watch() 179 // The first relist should send a PodSync event to each pod. 180 runtime.AllPodList = []*containertest.FakePod{ 181 {Pod: &kubecontainer.Pod{ 182 ID: "1234", 183 Containers: []*kubecontainer.Container{ 184 createTestContainer("c1", kubecontainer.ContainerStateExited), 185 createTestContainer("c2", kubecontainer.ContainerStateRunning), 186 createTestContainer("c3", kubecontainer.ContainerStateUnknown), 187 }, 188 }}, 189 {Pod: &kubecontainer.Pod{ 190 ID: "4567", 191 Containers: []*kubecontainer.Container{ 192 createTestContainer("c1", kubecontainer.ContainerStateExited), 193 }, 194 }}, 195 } 196 pleg.Relist() 197 // Report every running/exited container if we see them for the first time. 198 expected := []*PodLifecycleEvent{ 199 {ID: "1234", Type: ContainerStarted, Data: "c2"}, 200 {ID: "4567", Type: ContainerDied, Data: "c1"}, 201 {ID: "1234", Type: ContainerDied, Data: "c1"}, 202 } 203 actual := getEventsFromChannel(ch) 204 verifyEvents(t, expected, actual) 205 206 runtime.AllPodList = []*containertest.FakePod{ 207 {Pod: &kubecontainer.Pod{ 208 ID: "1234", 209 Containers: []*kubecontainer.Container{ 210 createTestContainer("c2", kubecontainer.ContainerStateExited), 211 createTestContainer("c3", kubecontainer.ContainerStateRunning), 212 }, 213 }}, 214 {Pod: &kubecontainer.Pod{ 215 ID: "4567", 216 Containers: []*kubecontainer.Container{ 217 createTestContainer("c4", kubecontainer.ContainerStateRunning), 218 }, 219 }}, 220 } 221 pleg.Relist() 222 allEvents := []*PodLifecycleEvent{ 223 {ID: "1234", Type: ContainerRemoved, Data: "c1"}, 224 {ID: "1234", Type: ContainerDied, Data: "c2"}, 225 {ID: "1234", Type: ContainerStarted, Data: "c3"}, 226 {ID: "4567", Type: ContainerRemoved, Data: "c1"}, 227 {ID: "4567", Type: ContainerStarted, Data: "c4"}, 228 } 229 // event channel is full, discard events 230 actual = getEventsFromChannel(ch) 231 assert.True(t, len(actual) == 4, "channel length should be 4") 232 assert.Subsetf(t, allEvents, actual, "actual events should in all events") 233 } 234 235 func TestDetectingContainerDeaths(t *testing.T) { 236 // Vary the number of relists after the container started and before the 237 // container died to account for the changes in pleg's internal states. 238 testReportMissingContainers(t, 1) 239 testReportMissingPods(t, 1) 240 241 testReportMissingContainers(t, 3) 242 testReportMissingPods(t, 3) 243 } 244 245 func testReportMissingContainers(t *testing.T, numRelists int) { 246 testPleg := newTestGenericPLEG() 247 pleg, runtime := testPleg.pleg, testPleg.runtime 248 ch := pleg.Watch() 249 runtime.AllPodList = []*containertest.FakePod{ 250 {Pod: &kubecontainer.Pod{ 251 ID: "1234", 252 Containers: []*kubecontainer.Container{ 253 createTestContainer("c1", kubecontainer.ContainerStateRunning), 254 createTestContainer("c2", kubecontainer.ContainerStateRunning), 255 createTestContainer("c3", kubecontainer.ContainerStateExited), 256 }, 257 }}, 258 } 259 // Relist and drain the events from the channel. 260 for i := 0; i < numRelists; i++ { 261 pleg.Relist() 262 getEventsFromChannel(ch) 263 } 264 265 // Container c2 was stopped and removed between relists. We should report 266 // the event. The exited container c3 was garbage collected (i.e., removed) 267 // between relists. We should ignore that event. 268 runtime.AllPodList = []*containertest.FakePod{ 269 {Pod: &kubecontainer.Pod{ 270 ID: "1234", 271 Containers: []*kubecontainer.Container{ 272 createTestContainer("c1", kubecontainer.ContainerStateRunning), 273 }, 274 }}, 275 } 276 pleg.Relist() 277 expected := []*PodLifecycleEvent{ 278 {ID: "1234", Type: ContainerDied, Data: "c2"}, 279 {ID: "1234", Type: ContainerRemoved, Data: "c2"}, 280 {ID: "1234", Type: ContainerRemoved, Data: "c3"}, 281 } 282 actual := getEventsFromChannel(ch) 283 verifyEvents(t, expected, actual) 284 } 285 286 func testReportMissingPods(t *testing.T, numRelists int) { 287 testPleg := newTestGenericPLEG() 288 pleg, runtime := testPleg.pleg, testPleg.runtime 289 ch := pleg.Watch() 290 runtime.AllPodList = []*containertest.FakePod{ 291 {Pod: &kubecontainer.Pod{ 292 ID: "1234", 293 Containers: []*kubecontainer.Container{ 294 createTestContainer("c2", kubecontainer.ContainerStateRunning), 295 }, 296 }}, 297 } 298 // Relist and drain the events from the channel. 299 for i := 0; i < numRelists; i++ { 300 pleg.Relist() 301 getEventsFromChannel(ch) 302 } 303 304 // Container c2 was stopped and removed between relists. We should report 305 // the event. 306 runtime.AllPodList = []*containertest.FakePod{} 307 pleg.Relist() 308 expected := []*PodLifecycleEvent{ 309 {ID: "1234", Type: ContainerDied, Data: "c2"}, 310 {ID: "1234", Type: ContainerRemoved, Data: "c2"}, 311 } 312 actual := getEventsFromChannel(ch) 313 verifyEvents(t, expected, actual) 314 } 315 316 func newTestGenericPLEGWithRuntimeMock(runtimeMock kubecontainer.Runtime) *GenericPLEG { 317 pleg := &GenericPLEG{ 318 relistDuration: &RelistDuration{RelistPeriod: time.Hour, RelistThreshold: 2 * time.Hour}, 319 runtime: runtimeMock, 320 eventChannel: make(chan *PodLifecycleEvent, 1000), 321 podRecords: make(podRecords), 322 cache: kubecontainer.NewCache(), 323 clock: clock.RealClock{}, 324 } 325 return pleg 326 } 327 328 func createTestPodsStatusesAndEvents(num int) ([]*kubecontainer.Pod, []*kubecontainer.PodStatus, []*PodLifecycleEvent) { 329 var pods []*kubecontainer.Pod 330 var statuses []*kubecontainer.PodStatus 331 var events []*PodLifecycleEvent 332 for i := 0; i < num; i++ { 333 id := types.UID(fmt.Sprintf("test-pod-%d", i)) 334 cState := kubecontainer.ContainerStateRunning 335 container := createTestContainer(fmt.Sprintf("c%d", i), cState) 336 pod := &kubecontainer.Pod{ 337 ID: id, 338 Containers: []*kubecontainer.Container{container}, 339 } 340 status := &kubecontainer.PodStatus{ 341 ID: id, 342 ContainerStatuses: []*kubecontainer.Status{{ID: container.ID, State: cState}}, 343 } 344 event := &PodLifecycleEvent{ID: pod.ID, Type: ContainerStarted, Data: container.ID.ID} 345 pods = append(pods, pod) 346 statuses = append(statuses, status) 347 events = append(events, event) 348 349 } 350 return pods, statuses, events 351 } 352 353 func TestRelistWithCache(t *testing.T) { 354 ctx := context.Background() 355 mockCtrl := gomock.NewController(t) 356 defer mockCtrl.Finish() 357 runtimeMock := containertest.NewMockRuntime(mockCtrl) 358 359 pleg := newTestGenericPLEGWithRuntimeMock(runtimeMock) 360 ch := pleg.Watch() 361 362 pods, statuses, events := createTestPodsStatusesAndEvents(2) 363 runtimeMock.EXPECT().GetPods(ctx, true).Return(pods, nil).AnyTimes() 364 runtimeMock.EXPECT().GetPodStatus(ctx, pods[0].ID, "", "").Return(statuses[0], nil).Times(1) 365 // Inject an error when querying runtime for the pod status for pods[1]. 366 statusErr := fmt.Errorf("unable to get status") 367 runtimeMock.EXPECT().GetPodStatus(ctx, pods[1].ID, "", "").Return(&kubecontainer.PodStatus{}, statusErr).Times(1) 368 369 pleg.Relist() 370 actualEvents := getEventsFromChannel(ch) 371 cases := []struct { 372 pod *kubecontainer.Pod 373 status *kubecontainer.PodStatus 374 error error 375 }{ 376 {pod: pods[0], status: statuses[0], error: nil}, 377 {pod: pods[1], status: &kubecontainer.PodStatus{}, error: statusErr}, 378 } 379 for i, c := range cases { 380 testStr := fmt.Sprintf("test[%d]", i) 381 actualStatus, actualErr := pleg.cache.Get(c.pod.ID) 382 assert.Equal(t, c.status, actualStatus, testStr) 383 assert.Equal(t, c.error, actualErr, testStr) 384 } 385 // pleg should not generate any event for pods[1] because of the error. 386 assert.Exactly(t, []*PodLifecycleEvent{events[0]}, actualEvents) 387 388 // Return normal status for pods[1]. 389 runtimeMock.EXPECT().GetPodStatus(ctx, pods[1].ID, "", "").Return(statuses[1], nil).Times(1) 390 pleg.Relist() 391 actualEvents = getEventsFromChannel(ch) 392 cases = []struct { 393 pod *kubecontainer.Pod 394 status *kubecontainer.PodStatus 395 error error 396 }{ 397 {pod: pods[0], status: statuses[0], error: nil}, 398 {pod: pods[1], status: statuses[1], error: nil}, 399 } 400 for i, c := range cases { 401 testStr := fmt.Sprintf("test[%d]", i) 402 actualStatus, actualErr := pleg.cache.Get(c.pod.ID) 403 assert.Equal(t, c.status, actualStatus, testStr) 404 assert.Equal(t, c.error, actualErr, testStr) 405 } 406 // Now that we are able to query status for pods[1], pleg should generate an event. 407 assert.Exactly(t, []*PodLifecycleEvent{events[1]}, actualEvents) 408 } 409 410 func TestRemoveCacheEntry(t *testing.T) { 411 ctx := context.Background() 412 mockCtrl := gomock.NewController(t) 413 defer mockCtrl.Finish() 414 runtimeMock := containertest.NewMockRuntime(mockCtrl) 415 pleg := newTestGenericPLEGWithRuntimeMock(runtimeMock) 416 417 pods, statuses, _ := createTestPodsStatusesAndEvents(1) 418 runtimeMock.EXPECT().GetPods(ctx, true).Return(pods, nil).Times(1) 419 runtimeMock.EXPECT().GetPodStatus(ctx, pods[0].ID, "", "").Return(statuses[0], nil).Times(1) 420 // Does a relist to populate the cache. 421 pleg.Relist() 422 // Delete the pod from runtime. Verify that the cache entry has been 423 // removed after relisting. 424 runtimeMock.EXPECT().GetPods(ctx, true).Return([]*kubecontainer.Pod{}, nil).Times(1) 425 pleg.Relist() 426 actualStatus, actualErr := pleg.cache.Get(pods[0].ID) 427 assert.Equal(t, &kubecontainer.PodStatus{ID: pods[0].ID}, actualStatus) 428 assert.Equal(t, nil, actualErr) 429 } 430 431 func TestHealthy(t *testing.T) { 432 testPleg := newTestGenericPLEG() 433 434 // pleg should initially be unhealthy 435 pleg, _, clock := testPleg.pleg, testPleg.runtime, testPleg.clock 436 ok, _ := pleg.Healthy() 437 assert.False(t, ok, "pleg should be unhealthy") 438 439 // Advance the clock without any relisting. 440 clock.Step(time.Minute * 10) 441 ok, _ = pleg.Healthy() 442 assert.False(t, ok, "pleg should be unhealthy") 443 444 // Relist and than advance the time by 1 minute. pleg should be healthy 445 // because this is within the allowed limit. 446 pleg.Relist() 447 clock.Step(time.Minute * 1) 448 ok, _ = pleg.Healthy() 449 assert.True(t, ok, "pleg should be healthy") 450 451 // Advance by relistThreshold without any relisting. pleg should be unhealthy 452 // because it has been longer than relistThreshold since a relist occurred. 453 clock.Step(pleg.relistDuration.RelistThreshold) 454 ok, _ = pleg.Healthy() 455 assert.False(t, ok, "pleg should be unhealthy") 456 } 457 458 func TestRelistWithReinspection(t *testing.T) { 459 ctx := context.Background() 460 mockCtrl := gomock.NewController(t) 461 defer mockCtrl.Finish() 462 runtimeMock := containertest.NewMockRuntime(mockCtrl) 463 464 pleg := newTestGenericPLEGWithRuntimeMock(runtimeMock) 465 ch := pleg.Watch() 466 467 infraContainer := createTestContainer("infra", kubecontainer.ContainerStateRunning) 468 469 podID := types.UID("test-pod") 470 pods := []*kubecontainer.Pod{{ 471 ID: podID, 472 Containers: []*kubecontainer.Container{infraContainer}, 473 }} 474 runtimeMock.EXPECT().GetPods(ctx, true).Return(pods, nil).Times(1) 475 476 goodStatus := &kubecontainer.PodStatus{ 477 ID: podID, 478 ContainerStatuses: []*kubecontainer.Status{{ID: infraContainer.ID, State: infraContainer.State}}, 479 } 480 runtimeMock.EXPECT().GetPodStatus(ctx, podID, "", "").Return(goodStatus, nil).Times(1) 481 482 goodEvent := &PodLifecycleEvent{ID: podID, Type: ContainerStarted, Data: infraContainer.ID.ID} 483 484 // listing 1 - everything ok, infra container set up for pod 485 pleg.Relist() 486 actualEvents := getEventsFromChannel(ch) 487 actualStatus, actualErr := pleg.cache.Get(podID) 488 assert.Equal(t, goodStatus, actualStatus) 489 assert.Equal(t, nil, actualErr) 490 assert.Exactly(t, []*PodLifecycleEvent{goodEvent}, actualEvents) 491 492 // listing 2 - pretend runtime was in the middle of creating the non-infra container for the pod 493 // and return an error during inspection 494 transientContainer := createTestContainer("transient", kubecontainer.ContainerStateUnknown) 495 podsWithTransientContainer := []*kubecontainer.Pod{{ 496 ID: podID, 497 Containers: []*kubecontainer.Container{infraContainer, transientContainer}, 498 }} 499 runtimeMock.EXPECT().GetPods(ctx, true).Return(podsWithTransientContainer, nil).Times(1) 500 501 badStatus := &kubecontainer.PodStatus{ 502 ID: podID, 503 ContainerStatuses: []*kubecontainer.Status{}, 504 } 505 runtimeMock.EXPECT().GetPodStatus(ctx, podID, "", "").Return(badStatus, errors.New("inspection error")).Times(1) 506 507 pleg.Relist() 508 actualEvents = getEventsFromChannel(ch) 509 actualStatus, actualErr = pleg.cache.Get(podID) 510 assert.Equal(t, badStatus, actualStatus) 511 assert.Equal(t, errors.New("inspection error"), actualErr) 512 assert.Exactly(t, []*PodLifecycleEvent{}, actualEvents) 513 514 // listing 3 - pretend the transient container has now disappeared, leaving just the infra 515 // container. Make sure the pod is reinspected for its status and the cache is updated. 516 runtimeMock.EXPECT().GetPods(ctx, true).Return(pods, nil).Times(1) 517 runtimeMock.EXPECT().GetPodStatus(ctx, podID, "", "").Return(goodStatus, nil).Times(1) 518 519 pleg.Relist() 520 actualEvents = getEventsFromChannel(ch) 521 actualStatus, actualErr = pleg.cache.Get(podID) 522 assert.Equal(t, goodStatus, actualStatus) 523 assert.Equal(t, nil, actualErr) 524 // no events are expected because relist #1 set the old pod record which has the infra container 525 // running. relist #2 had the inspection error and therefore didn't modify either old or new. 526 // relist #3 forced the reinspection of the pod to retrieve its status, but because the list of 527 // containers was the same as relist #1, nothing "changed", so there are no new events. 528 assert.Exactly(t, []*PodLifecycleEvent{}, actualEvents) 529 } 530 531 // Test detecting sandbox state changes. 532 func TestRelistingWithSandboxes(t *testing.T) { 533 testPleg := newTestGenericPLEG() 534 pleg, runtime := testPleg.pleg, testPleg.runtime 535 ch := pleg.Watch() 536 // The first relist should send a PodSync event to each pod. 537 runtime.AllPodList = []*containertest.FakePod{ 538 {Pod: &kubecontainer.Pod{ 539 ID: "1234", 540 Sandboxes: []*kubecontainer.Container{ 541 createTestContainer("c1", kubecontainer.ContainerStateExited), 542 createTestContainer("c2", kubecontainer.ContainerStateRunning), 543 createTestContainer("c3", kubecontainer.ContainerStateUnknown), 544 }, 545 }}, 546 {Pod: &kubecontainer.Pod{ 547 ID: "4567", 548 Sandboxes: []*kubecontainer.Container{ 549 createTestContainer("c1", kubecontainer.ContainerStateExited), 550 }, 551 }}, 552 } 553 pleg.Relist() 554 // Report every running/exited container if we see them for the first time. 555 expected := []*PodLifecycleEvent{ 556 {ID: "1234", Type: ContainerStarted, Data: "c2"}, 557 {ID: "4567", Type: ContainerDied, Data: "c1"}, 558 {ID: "1234", Type: ContainerDied, Data: "c1"}, 559 } 560 actual := getEventsFromChannel(ch) 561 verifyEvents(t, expected, actual) 562 563 // The second relist should not send out any event because no container has 564 // changed. 565 pleg.Relist() 566 verifyEvents(t, expected, actual) 567 568 runtime.AllPodList = []*containertest.FakePod{ 569 {Pod: &kubecontainer.Pod{ 570 ID: "1234", 571 Sandboxes: []*kubecontainer.Container{ 572 createTestContainer("c2", kubecontainer.ContainerStateExited), 573 createTestContainer("c3", kubecontainer.ContainerStateRunning), 574 }, 575 }}, 576 {Pod: &kubecontainer.Pod{ 577 ID: "4567", 578 Sandboxes: []*kubecontainer.Container{ 579 createTestContainer("c4", kubecontainer.ContainerStateRunning), 580 }, 581 }}, 582 } 583 pleg.Relist() 584 // Only report containers that transitioned to running or exited status. 585 expected = []*PodLifecycleEvent{ 586 {ID: "1234", Type: ContainerRemoved, Data: "c1"}, 587 {ID: "1234", Type: ContainerDied, Data: "c2"}, 588 {ID: "1234", Type: ContainerStarted, Data: "c3"}, 589 {ID: "4567", Type: ContainerRemoved, Data: "c1"}, 590 {ID: "4567", Type: ContainerStarted, Data: "c4"}, 591 } 592 593 actual = getEventsFromChannel(ch) 594 verifyEvents(t, expected, actual) 595 } 596 597 func TestRelistIPChange(t *testing.T) { 598 ctx := context.Background() 599 testCases := []struct { 600 name string 601 podID string 602 podIPs []string 603 }{ 604 { 605 name: "test-0", 606 podID: "test-pod-0", 607 podIPs: []string{"192.168.1.5"}, 608 }, 609 { 610 name: "tets-1", 611 podID: "test-pod-1", 612 podIPs: []string{"192.168.1.5/24", "2000::"}, 613 }, 614 } 615 616 mockCtrl := gomock.NewController(t) 617 defer mockCtrl.Finish() 618 619 for _, tc := range testCases { 620 runtimeMock := containertest.NewMockRuntime(mockCtrl) 621 622 pleg := newTestGenericPLEGWithRuntimeMock(runtimeMock) 623 ch := pleg.Watch() 624 625 id := types.UID(tc.podID) 626 cState := kubecontainer.ContainerStateRunning 627 container := createTestContainer("c0", cState) 628 pod := &kubecontainer.Pod{ 629 ID: id, 630 Containers: []*kubecontainer.Container{container}, 631 } 632 status := &kubecontainer.PodStatus{ 633 ID: id, 634 IPs: tc.podIPs, 635 ContainerStatuses: []*kubecontainer.Status{{ID: container.ID, State: cState}}, 636 } 637 event := &PodLifecycleEvent{ID: pod.ID, Type: ContainerStarted, Data: container.ID.ID} 638 639 runtimeMock.EXPECT().GetPods(ctx, true).Return([]*kubecontainer.Pod{pod}, nil).Times(1) 640 runtimeMock.EXPECT().GetPodStatus(ctx, pod.ID, "", "").Return(status, nil).Times(1) 641 642 pleg.Relist() 643 actualEvents := getEventsFromChannel(ch) 644 actualStatus, actualErr := pleg.cache.Get(pod.ID) 645 assert.Equal(t, status, actualStatus, tc.name) 646 assert.Nil(t, actualErr, tc.name) 647 assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents) 648 649 // Clear the IP address and mark the container terminated 650 container = createTestContainer("c0", kubecontainer.ContainerStateExited) 651 pod = &kubecontainer.Pod{ 652 ID: id, 653 Containers: []*kubecontainer.Container{container}, 654 } 655 status = &kubecontainer.PodStatus{ 656 ID: id, 657 ContainerStatuses: []*kubecontainer.Status{{ID: container.ID, State: kubecontainer.ContainerStateExited}}, 658 } 659 event = &PodLifecycleEvent{ID: pod.ID, Type: ContainerDied, Data: container.ID.ID} 660 runtimeMock.EXPECT().GetPods(ctx, true).Return([]*kubecontainer.Pod{pod}, nil).Times(1) 661 runtimeMock.EXPECT().GetPodStatus(ctx, pod.ID, "", "").Return(status, nil).Times(1) 662 663 pleg.Relist() 664 actualEvents = getEventsFromChannel(ch) 665 actualStatus, actualErr = pleg.cache.Get(pod.ID) 666 // Must copy status to compare since its pointer gets passed through all 667 // the way to the event 668 statusCopy := *status 669 statusCopy.IPs = tc.podIPs 670 assert.Equal(t, &statusCopy, actualStatus, tc.name) 671 assert.Nil(t, actualErr, tc.name) 672 assert.Exactly(t, []*PodLifecycleEvent{event}, actualEvents) 673 } 674 } 675 676 func TestRunningPodAndContainerCount(t *testing.T) { 677 metrics.Register() 678 testPleg := newTestGenericPLEG() 679 pleg, runtime := testPleg.pleg, testPleg.runtime 680 681 runtime.AllPodList = []*containertest.FakePod{ 682 {Pod: &kubecontainer.Pod{ 683 ID: "1234", 684 Containers: []*kubecontainer.Container{ 685 createTestContainer("c1", kubecontainer.ContainerStateRunning), 686 createTestContainer("c2", kubecontainer.ContainerStateUnknown), 687 createTestContainer("c3", kubecontainer.ContainerStateUnknown), 688 }, 689 Sandboxes: []*kubecontainer.Container{ 690 createTestContainer("s1", kubecontainer.ContainerStateRunning), 691 createTestContainer("s2", kubecontainer.ContainerStateRunning), 692 createTestContainer("s3", kubecontainer.ContainerStateUnknown), 693 }, 694 }}, 695 {Pod: &kubecontainer.Pod{ 696 ID: "4567", 697 Containers: []*kubecontainer.Container{ 698 createTestContainer("c1", kubecontainer.ContainerStateExited), 699 }, 700 Sandboxes: []*kubecontainer.Container{ 701 createTestContainer("s1", kubecontainer.ContainerStateRunning), 702 createTestContainer("s2", kubecontainer.ContainerStateExited), 703 }, 704 }}, 705 } 706 707 pleg.Relist() 708 709 tests := []struct { 710 name string 711 metricsName string 712 wants string 713 }{ 714 { 715 name: "test container count", 716 metricsName: "kubelet_running_containers", 717 wants: ` 718 # HELP kubelet_running_containers [ALPHA] Number of containers currently running 719 # TYPE kubelet_running_containers gauge 720 kubelet_running_containers{container_state="exited"} 1 721 kubelet_running_containers{container_state="running"} 1 722 kubelet_running_containers{container_state="unknown"} 2 723 `, 724 }, 725 { 726 name: "test pod count", 727 metricsName: "kubelet_running_pods", 728 wants: ` 729 # HELP kubelet_running_pods [ALPHA] Number of pods that have a running pod sandbox 730 # TYPE kubelet_running_pods gauge 731 kubelet_running_pods 2 732 `, 733 }, 734 } 735 736 for _, test := range tests { 737 tc := test 738 t.Run(tc.name, func(t *testing.T) { 739 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(tc.wants), tc.metricsName); err != nil { 740 t.Fatal(err) 741 } 742 }) 743 } 744 }