github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/imagescan/delta.go (about)

     1  package imagescan
     2  
     3  import (
     4  	"strings"
     5  	"sync"
     6  	"time"
     7  
     8  	castaipb "github.com/castai/kvisor/api/v1/runtime"
     9  	imagescanconfig "github.com/castai/kvisor/cmd/imagescan/config"
    10  
    11  	"github.com/castai/kvisor/cmd/controller/kube"
    12  	"github.com/samber/lo"
    13  	corev1 "k8s.io/api/core/v1"
    14  	"k8s.io/apimachinery/pkg/util/wait"
    15  )
    16  
    17  const defaultImageOs = "linux"
    18  const defaultImageArch = "amd64"
    19  
    20  type kubeClient interface {
    21  	GetOwnerUID(obj kube.Object) string
    22  	GetKvisorAgentImageDetails() (kube.ImageDetails, bool)
    23  }
    24  
    25  func newImage() *image {
    26  	return &image{
    27  		owners:  map[string]*imageOwner{},
    28  		scanned: false,
    29  		retryBackoff: wait.Backoff{
    30  			Duration: time.Second * 60,
    31  			Factor:   3,
    32  			Steps:    8,
    33  		},
    34  	}
    35  }
    36  
    37  func newDeltaState(kubeClient kubeClient) *deltaState {
    38  	return &deltaState{
    39  		kubeClient: kubeClient,
    40  		images:     map[string]*image{},
    41  		nodes:      map[string]*corev1.Node{},
    42  	}
    43  }
    44  
    45  type deltaState struct {
    46  	kubeClient kubeClient
    47  
    48  	mu sync.Mutex
    49  
    50  	// images holds current cluster images state. image struct contains associated nodes and owners.
    51  	images map[string]*image
    52  	nodes  map[string]*corev1.Node
    53  }
    54  
    55  func (d *deltaState) Upsert(o kube.Object) {
    56  	d.mu.Lock()
    57  	defer d.mu.Unlock()
    58  
    59  	switch v := o.(type) {
    60  	case *corev1.Pod:
    61  		d.handlePodUpdate(v)
    62  	case *corev1.Node:
    63  		d.nodes[v.Name] = v
    64  	}
    65  }
    66  
    67  func (d *deltaState) Delete(o kube.Object) {
    68  	d.mu.Lock()
    69  	defer d.mu.Unlock()
    70  
    71  	switch v := o.(type) {
    72  	case *corev1.Pod:
    73  		d.handlePodDelete(v)
    74  	case *corev1.Node:
    75  		delete(d.nodes, v.Name)
    76  	}
    77  }
    78  
    79  func (d *deltaState) GetImagesCopy() []*image {
    80  	d.mu.Lock()
    81  	defer d.mu.Unlock()
    82  
    83  	res := make([]*image, 0, len(d.images))
    84  	for _, img := range d.images {
    85  		imgCopy := *img
    86  		res = append(res, &imgCopy)
    87  	}
    88  	return res
    89  }
    90  
    91  func (d *deltaState) SetImageScanError(imgKey string, err error) {
    92  	d.mu.Lock()
    93  	defer d.mu.Unlock()
    94  
    95  	img := d.images[imgKey]
    96  	if img == nil {
    97  		return
    98  	}
    99  
   100  	img.failures++
   101  	img.lastScanErr = err
   102  
   103  	img.nextScan = time.Now().UTC().Add(img.retryBackoff.Step())
   104  }
   105  
   106  func (d *deltaState) SetResourcesUpdatedAt(images []*image, now time.Time) {
   107  	d.mu.Lock()
   108  	defer d.mu.Unlock()
   109  
   110  	for _, img := range images {
   111  		if deltaImg, ok := d.images[img.key]; ok {
   112  			deltaImg.resourcesUpdatedAt = now
   113  		}
   114  	}
   115  }
   116  
   117  func (d *deltaState) SetImageScanned(imgKey string) {
   118  	d.mu.Lock()
   119  	defer d.mu.Unlock()
   120  
   121  	if img, ok := d.images[imgKey]; ok {
   122  		img.scanned = true
   123  	}
   124  }
   125  
   126  func (d *deltaState) UpdateRemoteSyncedAt(images []*image, now time.Time) {
   127  	d.mu.Lock()
   128  	defer d.mu.Unlock()
   129  
   130  	for _, img := range images {
   131  		if deltaImg, ok := d.images[img.key]; ok {
   132  			deltaImg.lastRemoteSyncAt = now
   133  		}
   134  	}
   135  }
   136  
   137  func (d *deltaState) SetScannedImages(images []*castaipb.Image) {
   138  	d.mu.Lock()
   139  	defer d.mu.Unlock()
   140  
   141  	for _, remoteImage := range images {
   142  		d.setImageScanned(remoteImage)
   143  	}
   144  }
   145  
   146  func (d *deltaState) setImageScanned(scannedImg *castaipb.Image) {
   147  	for _, img := range d.images {
   148  		if img.id == scannedImg.Id && img.architecture == scannedImg.Architecture {
   149  			img.scanned = true
   150  		}
   151  	}
   152  }
   153  
   154  func (d *deltaState) handlePodUpdate(v *corev1.Pod) {
   155  	if v.Status.Phase == corev1.PodSucceeded {
   156  		d.handlePodDelete(v)
   157  	}
   158  	if v.Status.Phase == corev1.PodRunning {
   159  		d.upsertImages(v)
   160  	}
   161  }
   162  
   163  func (d *deltaState) upsertImages(pod *corev1.Pod) {
   164  	now := time.Now().UTC()
   165  
   166  	containers := pod.Spec.Containers
   167  	containers = append(containers, pod.Spec.InitContainers...)
   168  	containerStatuses := pod.Status.ContainerStatuses
   169  	containerStatuses = append(containerStatuses, pod.Status.InitContainerStatuses...)
   170  	podID := string(pod.UID)
   171  	// Get the resource id of Deployment, ReplicaSet, StatefulSet, Job, CronJob.
   172  	ownerResourceID := d.kubeClient.GetOwnerUID(pod)
   173  
   174  	for _, cont := range containers {
   175  		cs, found := lo.Find(containerStatuses, func(v corev1.ContainerStatus) bool {
   176  			return v.Name == cont.Name
   177  		})
   178  		if !found {
   179  			continue
   180  		}
   181  		if cs.ImageID == "" {
   182  			continue
   183  		}
   184  		if cont.Image == "" {
   185  			continue
   186  		}
   187  
   188  		platform := d.getPodPlatform(pod)
   189  		key := cs.ImageID + platform.architecture + cont.Image
   190  		img, found := d.images[key]
   191  		if !found {
   192  			img = newImage()
   193  			img.name = cont.Image
   194  			img.key = key
   195  			img.architecture = platform.architecture
   196  			img.os = platform.os
   197  		}
   198  		img.id = cs.ImageID
   199  		img.containerRuntime = getContainerRuntime(cs.ContainerID)
   200  
   201  		// Upsert image owners.
   202  		if owner, found := img.owners[ownerResourceID]; found {
   203  			owner.podIDs[podID] = struct{}{}
   204  		} else {
   205  			img.owners[ownerResourceID] = &imageOwner{
   206  				podIDs: map[string]struct{}{
   207  					podID: {},
   208  				},
   209  			}
   210  			img.ownerChangedAt = now
   211  		}
   212  
   213  		d.images[key] = img
   214  	}
   215  }
   216  
   217  func (d *deltaState) handlePodDelete(pod *corev1.Pod) {
   218  	now := time.Now().UTC()
   219  	for imgKey, img := range d.images {
   220  		if img.architecture != d.getPodPlatform(pod).architecture {
   221  			continue
   222  		}
   223  
   224  		podID := string(pod.UID)
   225  
   226  		ownerResourceID := d.kubeClient.GetOwnerUID(pod)
   227  		if owner, found := img.owners[ownerResourceID]; found {
   228  			delete(owner.podIDs, podID)
   229  			if len(owner.podIDs) == 0 {
   230  				delete(img.owners, ownerResourceID)
   231  				img.ownerChangedAt = now
   232  			}
   233  		}
   234  
   235  		if len(img.owners) == 0 {
   236  			delete(d.images, imgKey)
   237  		}
   238  	}
   239  }
   240  
   241  type platform struct {
   242  	architecture string
   243  	os           string
   244  }
   245  
   246  func (d *deltaState) getPodPlatform(pod *corev1.Pod) platform {
   247  	n, ok := d.nodes[pod.Spec.NodeName]
   248  	if ok && n.Status.NodeInfo.Architecture != "" && n.Status.NodeInfo.OperatingSystem != "" {
   249  		return platform{
   250  			architecture: n.Status.NodeInfo.Architecture,
   251  			os:           n.Status.NodeInfo.OperatingSystem,
   252  		}
   253  	}
   254  	return platform{
   255  		architecture: defaultImageArch,
   256  		os:           defaultImageOs,
   257  	}
   258  }
   259  
   260  func getContainerRuntime(containerID string) imagescanconfig.Runtime {
   261  	parts := strings.Split(containerID, "://")
   262  	if len(parts) != 2 {
   263  		return ""
   264  	}
   265  	cr := parts[0]
   266  	switch cr {
   267  	case "docker":
   268  		return imagescanconfig.RuntimeDocker
   269  	case "containerd":
   270  		return imagescanconfig.RuntimeContainerd
   271  	}
   272  	return ""
   273  }
   274  
   275  type imageOwner struct {
   276  	podIDs map[string]struct{}
   277  }
   278  
   279  type image struct {
   280  	key string // used in map[string]*image
   281  
   282  	// id is ImageID from container status. It includes image name and digest.
   283  	//
   284  	// Note: ImageID's digest part could confuse you with actual image digest.
   285  	// Kubernetes calculates digest based on one of these cases:
   286  	// 1. Index manifest (if exists).
   287  	// 2. Manifest file.
   288  	// 3. Config file. Mostly legacy for old images without manifest.
   289  	id string
   290  
   291  	// name is image name from container spec.
   292  	//
   293  	// Note: We select image name from container spec (not from container status).
   294  	// In container status you will see fully qualified image name, eg. docker.io/grafana/grafana:latest
   295  	// while on container spec you will see user defined image name which may not be fully qualified, eg: grafana/grafana:latest
   296  	name string
   297  
   298  	architecture     string
   299  	os               string
   300  	containerRuntime imagescanconfig.Runtime
   301  
   302  	// owners map key points to higher level k8s resource for that image. (Image Affected resource in CAST AI console).
   303  	// Example: In most cases Pod will be managed by deployment, so owner id will point to Deployment's uuid.
   304  	owners map[string]*imageOwner
   305  
   306  	scanned      bool
   307  	lastScanErr  error
   308  	failures     int          // Used for sorting. We want to scan non-failed images first.
   309  	retryBackoff wait.Backoff // Retry state for failed images.
   310  	nextScan     time.Time    // Set based on retry backoff.
   311  
   312  	lastRemoteSyncAt   time.Time // Time then image state was synced from remote.
   313  	ownerChangedAt     time.Time // Time when new image owner was added
   314  	resourcesUpdatedAt time.Time // Time when image was synced with backend
   315  }