github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/imagescan/controller_test.go (about) 1 package imagescan 2 3 import ( 4 "context" 5 "errors" 6 "log/slog" 7 "sort" 8 "sync" 9 "testing" 10 "time" 11 12 castaipb "github.com/castai/kvisor/api/v1/runtime" 13 "github.com/castai/kvisor/cmd/controller/kube" 14 "github.com/castai/kvisor/pkg/logging" 15 "github.com/google/uuid" 16 "github.com/stretchr/testify/mock" 17 "github.com/stretchr/testify/require" 18 "google.golang.org/grpc" 19 corev1 "k8s.io/api/core/v1" 20 "k8s.io/apimachinery/pkg/api/resource" 21 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 "k8s.io/apimachinery/pkg/types" 23 24 imgcollectorconfig "github.com/castai/kvisor/cmd/imagescan/config" 25 ) 26 27 func TestSubscriber(t *testing.T) { 28 ctx := context.Background() 29 log := logging.New(&logging.Config{ 30 Level: slog.LevelDebug, 31 }) 32 33 createNode := func(name string) *corev1.Node { 34 return &corev1.Node{ 35 TypeMeta: metav1.TypeMeta{ 36 Kind: "Node", 37 APIVersion: "v1", 38 }, 39 ObjectMeta: metav1.ObjectMeta{ 40 Name: name, 41 Labels: map[string]string{ 42 "provisioner.cast.ai/managed-by": "cast.ai", 43 }, 44 }, 45 Status: corev1.NodeStatus{ 46 NodeInfo: corev1.NodeSystemInfo{ 47 Architecture: "amd64", 48 OperatingSystem: "linux", 49 }, 50 Allocatable: corev1.ResourceList{ 51 corev1.ResourceCPU: resource.MustParse("2"), 52 corev1.ResourceMemory: resource.MustParse("4Gi"), 53 }, 54 }, 55 } 56 } 57 58 assertLoop := func(errc chan error, assertFunc func() bool) { 59 timeout := time.After(2 * time.Second) 60 61 for { 62 select { 63 case err := <-errc: 64 t.Fatal(err) 65 case <-timeout: 66 t.Fatal("timeout waiting for image scan") 67 case <-time.After(10 * time.Millisecond): 68 if assertFunc() { 69 return 70 } 71 } 72 } 73 } 74 75 t.Run("schedule and finish scan", func(t *testing.T) { 76 r := require.New(t) 77 78 node1 := createNode("n1") 79 node2 := createNode("n2") 80 81 nginxPod1 := &corev1.Pod{ 82 TypeMeta: metav1.TypeMeta{Kind: "Pod", APIVersion: "v1"}, 83 ObjectMeta: metav1.ObjectMeta{ 84 UID: types.UID(uuid.New().String()), 85 Name: "nginx-1", 86 Namespace: "default", 87 }, 88 Spec: corev1.PodSpec{ 89 NodeName: node1.Name, 90 Containers: []corev1.Container{ 91 { 92 Name: "nginx", 93 Image: "nginx:1.23", 94 }, 95 }, 96 }, 97 Status: corev1.PodStatus{ 98 Phase: corev1.PodRunning, 99 ContainerStatuses: []corev1.ContainerStatus{ 100 {Name: "nginx", ImageID: "nginx:1.23@sha256", ContainerID: "containerd://sha256"}, 101 }, 102 }, 103 } 104 105 nginxPod2 := &corev1.Pod{ 106 TypeMeta: metav1.TypeMeta{Kind: "Pod", APIVersion: "v1"}, 107 ObjectMeta: metav1.ObjectMeta{ 108 UID: types.UID(uuid.New().String()), 109 Name: "nginx-2", 110 Namespace: "kube-system", 111 }, 112 Spec: corev1.PodSpec{ 113 NodeName: node2.Name, 114 Containers: []corev1.Container{ 115 { 116 Name: "nginx", 117 Image: "nginx:1.23", 118 }, 119 }, 120 }, 121 Status: corev1.PodStatus{ 122 Phase: corev1.PodRunning, 123 ContainerStatuses: []corev1.ContainerStatus{ 124 {Name: "nginx", ImageID: "nginx:1.23@sha256", ContainerID: "containerd://sha256"}, 125 }, 126 }, 127 } 128 129 createArgoPod := func(podName string) *corev1.Pod { 130 return &corev1.Pod{ 131 TypeMeta: metav1.TypeMeta{Kind: "Pod", APIVersion: "v1"}, 132 ObjectMeta: metav1.ObjectMeta{ 133 UID: types.UID(uuid.New().String()), 134 Name: podName, 135 Namespace: "argo", 136 }, 137 Spec: corev1.PodSpec{ 138 NodeName: node2.Name, 139 Containers: []corev1.Container{ 140 { 141 Name: "argocd", 142 Image: "argocd:0.0.1", 143 }, 144 }, 145 InitContainers: []corev1.Container{ 146 { 147 Name: "init-argo", 148 Image: "init-argo:0.0.1", 149 }, 150 }, 151 }, 152 Status: corev1.PodStatus{ 153 Phase: corev1.PodRunning, 154 ContainerStatuses: []corev1.ContainerStatus{ 155 {Name: "argocd", ImageID: "argocd:1.23@sha256", ContainerID: "containerd://sha256"}, 156 }, 157 InitContainerStatuses: []corev1.ContainerStatus{ 158 {Name: "init-argo", ImageID: "init-argo:1.23@sha256", ContainerID: "containerd://sha256"}, 159 }, 160 }, 161 } 162 } 163 164 argoPod1 := createArgoPod("argo1") 165 argoPod2 := createArgoPod("argo2") 166 167 cfg := Config{ 168 ScanInterval: 1 * time.Millisecond, 169 ScanTimeout: time.Minute, 170 Mode: "hostfs", 171 MaxConcurrentScans: 5, 172 CPURequest: "500m", 173 CPULimit: "2", 174 MemoryRequest: "100Mi", 175 MemoryLimit: "2Gi", 176 } 177 178 client := &mockCastaiClient{} 179 scanner := &mockImageScanner{} 180 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(nil) 181 sub := newTestController(log, cfg) 182 sub.client = client 183 sub.imageScanner = scanner 184 sub.initialScansDelay = 1 * time.Millisecond 185 ctx, cancel := context.WithCancel(ctx) 186 defer cancel() 187 188 // Simulate concurrent deltas update. 189 go func() { 190 for { 191 sub.OnUpdate(node1) 192 sub.OnUpdate(node2) 193 sub.OnUpdate(argoPod1) 194 sub.OnUpdate(argoPod2) 195 sub.OnUpdate(nginxPod1) 196 sub.OnUpdate(nginxPod2) 197 time.Sleep(1 * time.Millisecond) 198 } 199 }() 200 201 errc := make(chan error, 1) 202 go func() { 203 errc <- sub.Run(ctx) 204 }() 205 206 assertLoop(errc, func() bool { 207 imgs := scanner.getScanImageParams() 208 if len(imgs) == 0 { 209 return false 210 } 211 212 sort.Slice(imgs, func(i, j int) bool { 213 return imgs[i].ImageName < imgs[j].ImageName 214 }) 215 r.Len(imgs, 3) 216 argoImg := imgs[0] 217 argoInitImg := imgs[1] 218 r.Equal("argocd:0.0.1", argoImg.ImageName) 219 r.Equal("init-argo:0.0.1", argoInitImg.ImageName) 220 expectedArgoPodResourceIDs := []string{string(argoPod1.UID), string(argoPod2.UID)} 221 sort.Strings(argoImg.ResourceIDs) 222 sort.Strings(expectedArgoPodResourceIDs) 223 r.Equal(expectedArgoPodResourceIDs, argoImg.ResourceIDs) 224 225 ngnxImage := imgs[2] 226 expectedNginxPodResourceIDs := []string{string(nginxPod1.UID), string(nginxPod2.UID)} 227 sort.Strings(ngnxImage.ResourceIDs) 228 sort.Strings(expectedNginxPodResourceIDs) 229 r.Equal(expectedNginxPodResourceIDs, ngnxImage.ResourceIDs) 230 r.Equal(ScanImageParams{ 231 ImageName: "nginx:1.23", 232 ImageID: "nginx:1.23@sha256", 233 ContainerRuntime: "containerd", 234 Mode: "remote", 235 ResourceIDs: ngnxImage.ResourceIDs, 236 DeleteFinishedJob: true, 237 WaitForCompletion: true, 238 WaitDurationAfterCompletion: 30 * time.Second, 239 Architecture: defaultImageArch, 240 Os: defaultImageOs, 241 ScanImageDetails: kube.ImageDetails{ 242 ScannerImageName: "kvisor-scanners", 243 ImagePullSecrets: nil, 244 }, 245 }, ngnxImage) 246 r.Len(client.getImagesResourcesChanges(), 1) 247 r.Len(client.getImagesResourcesChanges()[0].Images, 3) 248 r.Equal(castaipb.ImageScanStatus_IMAGE_SCAN_STATUS_PENDING, client.getImagesResourcesChanges()[0].Images[0].ScanStatus) 249 r.Equal(castaipb.ImageScanStatus_IMAGE_SCAN_STATUS_PENDING, client.getImagesResourcesChanges()[0].Images[1].ScanStatus) 250 r.Equal(castaipb.ImageScanStatus_IMAGE_SCAN_STATUS_PENDING, client.getImagesResourcesChanges()[0].Images[2].ScanStatus) 251 252 return true 253 }) 254 255 }) 256 257 t.Run("retry failed images", func(t *testing.T) { 258 r := require.New(t) 259 260 cfg := Config{ 261 ScanInterval: 1 * time.Millisecond, 262 ScanTimeout: time.Minute, 263 MaxConcurrentScans: 5, 264 CPURequest: "500m", 265 CPULimit: "2", 266 MemoryRequest: "100Mi", 267 MemoryLimit: "2Gi", 268 } 269 270 client := &mockCastaiClient{} 271 scanner := &mockImageScanner{} 272 sub := newTestController(log, cfg) 273 sub.client = client 274 sub.imageScanner = scanner 275 sub.initialScansDelay = 1 * time.Millisecond 276 sub.timeGetter = func() time.Time { 277 return time.Now().UTC().Add(time.Hour) 278 } 279 delta := sub.delta 280 img := newImage() 281 img.name = "img" 282 img.id = "img1" 283 img.key = "img1amd64img" 284 img.architecture = "amd64" 285 img.owners = map[string]*imageOwner{ 286 "r1": {}, 287 } 288 289 delta.images[img.key] = img 290 291 expectedErr := errors.New("failed") 292 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(expectedErr).Once() 293 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(nil).Once() 294 295 ctx, cancel := context.WithTimeout(ctx, 50*time.Millisecond) 296 defer cancel() 297 298 errc := make(chan error, 1) 299 go func() { 300 errc <- sub.Run(ctx) 301 }() 302 303 assertLoop(errc, func() bool { 304 imgs := scanner.getScanImageParams() 305 if len(imgs) == 0 { 306 return false 307 } 308 309 r.Len(imgs, 2) 310 img = delta.images[img.key] 311 r.False(img.nextScan.IsZero()) 312 313 r.Len(client.getImagesResourcesChanges(), 2) 314 // first scan update is pending 315 r.Len(client.getImagesResourcesChanges()[0].Images, 1) 316 r.Equal(castaipb.ImageScanStatus_IMAGE_SCAN_STATUS_PENDING, client.getImagesResourcesChanges()[0].Images[0].ScanStatus) 317 r.Empty(client.getImagesResourcesChanges()[0].Images[0].ScanError) 318 // second scan update is error 319 r.Len(client.getImagesResourcesChanges()[1].Images, 1) 320 r.Equal(castaipb.ImageScanStatus_IMAGE_SCAN_STATUS_SCAN_ERROR, client.getImagesResourcesChanges()[1].Images[0].ScanStatus) 321 r.Equal(expectedErr.Error(), client.getImagesResourcesChanges()[1].Images[0].ScanError) 322 return true 323 }) 324 }) 325 326 t.Run("scan image with remote mode fallback", func(t *testing.T) { 327 r := require.New(t) 328 329 cfg := Config{ 330 ScanInterval: 1 * time.Millisecond, 331 ScanTimeout: time.Minute, 332 MaxConcurrentScans: 5, 333 Mode: string(imgcollectorconfig.ModeHostFS), 334 CPURequest: "500m", 335 CPULimit: "2", 336 MemoryRequest: "100Mi", 337 MemoryLimit: "2Gi", 338 } 339 340 scanner := &mockImageScanner{} 341 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(nil) 342 sub := newTestController(log, cfg) 343 sub.imageScanner = scanner 344 sub.initialScansDelay = 1 * time.Millisecond 345 sub.timeGetter = func() time.Time { 346 return time.Now().UTC().Add(time.Hour) 347 } 348 delta := sub.delta 349 img := newImage() 350 img.name = "img" 351 img.id = "img1" 352 img.key = "img1amd64img" 353 img.architecture = "amd64" 354 img.containerRuntime = imgcollectorconfig.RuntimeContainerd 355 img.owners = map[string]*imageOwner{ 356 "r1": {}, 357 } 358 delta.images[img.key] = img 359 delta.SetImageScanError(img.key, errImageScanLayerNotFound) 360 361 ctx, cancel := context.WithTimeout(ctx, 50*time.Millisecond) 362 defer cancel() 363 364 errc := make(chan error, 1) 365 go func() { 366 errc <- sub.Run(ctx) 367 }() 368 369 assertLoop(errc, func() bool { 370 imgs := scanner.getScanImageParams() 371 if len(imgs) == 0 { 372 return false 373 } 374 375 r.Len(imgs, 1) 376 r.Equal(string(imgcollectorconfig.ModeRemote), imgs[0].Mode) 377 return true 378 }) 379 }) 380 381 t.Run("select any node with remote scan mode", func(t *testing.T) { 382 r := require.New(t) 383 384 cfg := Config{ 385 ScanInterval: 1 * time.Millisecond, 386 ScanTimeout: time.Minute, 387 MaxConcurrentScans: 5, 388 Mode: string(imgcollectorconfig.ModeRemote), 389 CPURequest: "500m", 390 CPULimit: "2", 391 MemoryRequest: "100Mi", 392 MemoryLimit: "2Gi", 393 } 394 395 scanner := &mockImageScanner{} 396 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(nil) 397 client := &mockCastaiClient{} 398 podOwnerGetter := &mockKubeController{} 399 sub := NewController(log, cfg, scanner, client, podOwnerGetter) 400 sub.initialScansDelay = 1 * time.Millisecond 401 sub.timeGetter = func() time.Time { 402 return time.Now().UTC().Add(time.Hour) 403 } 404 delta := sub.delta 405 img := newImage() 406 img.name = "img" 407 img.id = "img1" 408 img.key = "img1amd64img" 409 img.architecture = "amd64" 410 img.containerRuntime = imgcollectorconfig.RuntimeContainerd 411 img.owners = map[string]*imageOwner{ 412 "r1": {}, 413 } 414 delta.images[img.key] = img 415 416 ctx, cancel := context.WithTimeout(ctx, 50*time.Millisecond) 417 defer cancel() 418 419 errc := make(chan error, 1) 420 go func() { 421 errc <- sub.Run(ctx) 422 }() 423 424 assertLoop(errc, func() bool { 425 imgs := scanner.getScanImageParams() 426 if len(imgs) == 0 { 427 return false 428 } 429 430 r.Len(imgs, 1) 431 r.Equal(string(imgcollectorconfig.ModeRemote), imgs[0].Mode) 432 return true 433 }) 434 }) 435 436 t.Run("send changed resource owners", func(t *testing.T) { 437 r := require.New(t) 438 439 cfg := Config{ 440 ScanInterval: 1 * time.Millisecond, 441 } 442 443 client := &mockCastaiClient{} 444 sub := newTestController(log, cfg) 445 sub.client = client 446 sub.initialScansDelay = 1 * time.Millisecond 447 sub.timeGetter = func() time.Time { 448 return time.Now().UTC().Add(time.Hour) 449 } 450 delta := sub.delta 451 img := newImage() 452 img.name = "img" 453 img.id = "img1" 454 img.key = "img1amd64img" 455 img.architecture = "amd64" 456 img.owners = map[string]*imageOwner{ 457 "r1": {}, 458 } 459 delta.images[img.key] = img 460 461 ctx, cancel := context.WithTimeout(ctx, 50*time.Millisecond) 462 defer cancel() 463 464 errc := make(chan error, 1) 465 go func() { 466 errc <- sub.Run(ctx) 467 }() 468 469 assertLoop(errc, func() bool { 470 changes := client.getImagesResourcesChanges() 471 472 // Should have only 1 call to api because there are no delta updates 473 r.Len(changes, 1) 474 475 // First api call. Initial full resync. 476 change1Img1 := changes[0].Images[0] 477 r.Equal("img1", change1Img1.Id) 478 r.Equal("amd64", change1Img1.Architecture) 479 r.Equal([]string{"r1"}, change1Img1.ResourceIds) 480 481 return true 482 }) 483 }) 484 485 t.Run("sync scanned images from remote state", func(t *testing.T) { 486 r := require.New(t) 487 488 cfg := Config{ 489 ScanInterval: 1 * time.Millisecond, 490 ScanTimeout: time.Minute, 491 MaxConcurrentScans: 5, 492 CPURequest: "500m", 493 CPULimit: "2", 494 MemoryRequest: "100Mi", 495 MemoryLimit: "2Gi", 496 } 497 498 scanner := &mockImageScanner{} 499 scanner.On("ScanImage", mock.Anything, mock.Anything).Return(nil) 500 client := &mockCastaiClient{ 501 syncState: &castaipb.GetSyncStateResponse{ 502 Images: &castaipb.ImagesSyncState{ 503 Images: []*castaipb.Image{ 504 { 505 Id: "img1", 506 Architecture: "amd64", 507 }, 508 { 509 Id: "img2", 510 Architecture: "amd64", 511 }, 512 }, 513 }, 514 }, 515 } 516 sub := newTestController(log, cfg) 517 sub.imageScanner = scanner 518 sub.client = client 519 sub.initialScansDelay = 1 * time.Millisecond 520 sub.timeGetter = func() time.Time { 521 return time.Now().UTC().Add(time.Hour) 522 } 523 delta := sub.delta 524 img1 := newImage() 525 img1.name = "img1" 526 img1.id = "img1" 527 img1.key = "img1amd64img1" 528 img1.architecture = "amd64" 529 img1.owners = map[string]*imageOwner{ 530 "r1": {}, 531 } 532 delta.images[img1.key] = img1 533 534 img2 := newImage() 535 img2.name = "img2" 536 img2.id = "img2" 537 img2.key = "img2amd64img2" 538 img2.architecture = "amd64" 539 img2.owners = map[string]*imageOwner{ 540 "r2": {}, 541 } 542 delta.images[img2.key] = img2 543 544 errc := make(chan error, 1) 545 go func() { 546 errc <- sub.Run(ctx) 547 }() 548 549 assertLoop(errc, func() bool { 550 syncCalls := client.getSyncStateCalls() 551 if syncCalls < 1 { 552 return false 553 } 554 555 // Should have only one api call. 556 r.Equal(1, syncCalls) 557 return true 558 }) 559 }) 560 } 561 562 func newTestController(log *logging.Logger, cfg Config) *Controller { 563 scanner := &mockImageScanner{} 564 client := &mockCastaiClient{} 565 podOwnerGetter := &mockKubeController{} 566 return NewController(log, cfg, scanner, client, podOwnerGetter) 567 } 568 569 type mockImageScanner struct { 570 mu sync.Mutex 571 imgs []ScanImageParams 572 mock.Mock 573 } 574 575 func (m *mockImageScanner) ScanImage(ctx context.Context, cfg ScanImageParams) (err error) { 576 m.mu.Lock() 577 defer m.mu.Unlock() 578 m.imgs = append(m.imgs, cfg) 579 return m.Called(ctx, cfg).Error(0) 580 } 581 582 func (m *mockImageScanner) getScanImageParams() []ScanImageParams { 583 m.mu.Lock() 584 defer m.mu.Unlock() 585 return m.imgs 586 } 587 588 type mockKubeController struct { 589 } 590 591 func (m *mockKubeController) GetOwnerUID(obj kube.Object) string { 592 return string(obj.GetUID()) 593 } 594 595 func (m *mockKubeController) GetKvisorAgentImageDetails() (kube.ImageDetails, bool) { 596 return kube.ImageDetails{ 597 ScannerImageName: "kvisor-scanners", 598 ImagePullSecrets: nil, 599 }, true 600 } 601 602 type mockCastaiClient struct { 603 mu sync.Mutex 604 metas []*castaipb.ImageMetadata 605 606 imagesResourcesChanges []*castaipb.UpdateSyncStateRequest 607 608 syncState *castaipb.GetSyncStateResponse 609 syncStateCalls int 610 } 611 612 func (m *mockCastaiClient) GetSyncState(ctx context.Context, in *castaipb.GetSyncStateRequest, opts ...grpc.CallOption) (*castaipb.GetSyncStateResponse, error) { 613 m.mu.Lock() 614 defer m.mu.Unlock() 615 m.syncStateCalls++ 616 if m.syncState != nil { 617 return m.syncState, nil 618 } 619 return &castaipb.GetSyncStateResponse{}, nil 620 } 621 622 func (m *mockCastaiClient) UpdateSyncState(ctx context.Context, in *castaipb.UpdateSyncStateRequest, opts ...grpc.CallOption) (*castaipb.UpdateSyncStateResponse, error) { 623 m.mu.Lock() 624 defer m.mu.Unlock() 625 m.imagesResourcesChanges = append(m.imagesResourcesChanges, in) 626 return &castaipb.UpdateSyncStateResponse{}, nil 627 } 628 629 func (m *mockCastaiClient) getImagesResourcesChanges() []*castaipb.UpdateSyncStateRequest { 630 m.mu.Lock() 631 defer m.mu.Unlock() 632 return m.imagesResourcesChanges 633 } 634 635 func (m *mockCastaiClient) getSyncStateCalls() int { 636 m.mu.Lock() 637 defer m.mu.Unlock() 638 return m.syncStateCalls 639 }