github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/imagescan/delta.go (about) 1 package imagescan 2 3 import ( 4 "strings" 5 "sync" 6 "time" 7 8 castaipb "github.com/castai/kvisor/api/v1/runtime" 9 imagescanconfig "github.com/castai/kvisor/cmd/imagescan/config" 10 11 "github.com/castai/kvisor/cmd/controller/kube" 12 "github.com/samber/lo" 13 corev1 "k8s.io/api/core/v1" 14 "k8s.io/apimachinery/pkg/util/wait" 15 ) 16 17 const defaultImageOs = "linux" 18 const defaultImageArch = "amd64" 19 20 type kubeClient interface { 21 GetOwnerUID(obj kube.Object) string 22 GetKvisorAgentImageDetails() (kube.ImageDetails, bool) 23 } 24 25 func newImage() *image { 26 return &image{ 27 owners: map[string]*imageOwner{}, 28 scanned: false, 29 retryBackoff: wait.Backoff{ 30 Duration: time.Second * 60, 31 Factor: 3, 32 Steps: 8, 33 }, 34 } 35 } 36 37 func newDeltaState(kubeClient kubeClient) *deltaState { 38 return &deltaState{ 39 kubeClient: kubeClient, 40 images: map[string]*image{}, 41 nodes: map[string]*corev1.Node{}, 42 } 43 } 44 45 type deltaState struct { 46 kubeClient kubeClient 47 48 mu sync.Mutex 49 50 // images holds current cluster images state. image struct contains associated nodes and owners. 51 images map[string]*image 52 nodes map[string]*corev1.Node 53 } 54 55 func (d *deltaState) Upsert(o kube.Object) { 56 d.mu.Lock() 57 defer d.mu.Unlock() 58 59 switch v := o.(type) { 60 case *corev1.Pod: 61 d.handlePodUpdate(v) 62 case *corev1.Node: 63 d.nodes[v.Name] = v 64 } 65 } 66 67 func (d *deltaState) Delete(o kube.Object) { 68 d.mu.Lock() 69 defer d.mu.Unlock() 70 71 switch v := o.(type) { 72 case *corev1.Pod: 73 d.handlePodDelete(v) 74 case *corev1.Node: 75 delete(d.nodes, v.Name) 76 } 77 } 78 79 func (d *deltaState) GetImagesCopy() []*image { 80 d.mu.Lock() 81 defer d.mu.Unlock() 82 83 res := make([]*image, 0, len(d.images)) 84 for _, img := range d.images { 85 imgCopy := *img 86 res = append(res, &imgCopy) 87 } 88 return res 89 } 90 91 func (d *deltaState) SetImageScanError(imgKey string, err error) { 92 d.mu.Lock() 93 defer d.mu.Unlock() 94 95 img := d.images[imgKey] 96 if img == nil { 97 return 98 } 99 100 img.failures++ 101 img.lastScanErr = err 102 103 img.nextScan = time.Now().UTC().Add(img.retryBackoff.Step()) 104 } 105 106 func (d *deltaState) SetResourcesUpdatedAt(images []*image, now time.Time) { 107 d.mu.Lock() 108 defer d.mu.Unlock() 109 110 for _, img := range images { 111 if deltaImg, ok := d.images[img.key]; ok { 112 deltaImg.resourcesUpdatedAt = now 113 } 114 } 115 } 116 117 func (d *deltaState) SetImageScanned(imgKey string) { 118 d.mu.Lock() 119 defer d.mu.Unlock() 120 121 if img, ok := d.images[imgKey]; ok { 122 img.scanned = true 123 } 124 } 125 126 func (d *deltaState) UpdateRemoteSyncedAt(images []*image, now time.Time) { 127 d.mu.Lock() 128 defer d.mu.Unlock() 129 130 for _, img := range images { 131 if deltaImg, ok := d.images[img.key]; ok { 132 deltaImg.lastRemoteSyncAt = now 133 } 134 } 135 } 136 137 func (d *deltaState) SetScannedImages(images []*castaipb.Image) { 138 d.mu.Lock() 139 defer d.mu.Unlock() 140 141 for _, remoteImage := range images { 142 d.setImageScanned(remoteImage) 143 } 144 } 145 146 func (d *deltaState) setImageScanned(scannedImg *castaipb.Image) { 147 for _, img := range d.images { 148 if img.id == scannedImg.Id && img.architecture == scannedImg.Architecture { 149 img.scanned = true 150 } 151 } 152 } 153 154 func (d *deltaState) handlePodUpdate(v *corev1.Pod) { 155 if v.Status.Phase == corev1.PodSucceeded { 156 d.handlePodDelete(v) 157 } 158 if v.Status.Phase == corev1.PodRunning { 159 d.upsertImages(v) 160 } 161 } 162 163 func (d *deltaState) upsertImages(pod *corev1.Pod) { 164 now := time.Now().UTC() 165 166 containers := pod.Spec.Containers 167 containers = append(containers, pod.Spec.InitContainers...) 168 containerStatuses := pod.Status.ContainerStatuses 169 containerStatuses = append(containerStatuses, pod.Status.InitContainerStatuses...) 170 podID := string(pod.UID) 171 // Get the resource id of Deployment, ReplicaSet, StatefulSet, Job, CronJob. 172 ownerResourceID := d.kubeClient.GetOwnerUID(pod) 173 174 for _, cont := range containers { 175 cs, found := lo.Find(containerStatuses, func(v corev1.ContainerStatus) bool { 176 return v.Name == cont.Name 177 }) 178 if !found { 179 continue 180 } 181 if cs.ImageID == "" { 182 continue 183 } 184 if cont.Image == "" { 185 continue 186 } 187 188 platform := d.getPodPlatform(pod) 189 key := cs.ImageID + platform.architecture + cont.Image 190 img, found := d.images[key] 191 if !found { 192 img = newImage() 193 img.name = cont.Image 194 img.key = key 195 img.architecture = platform.architecture 196 img.os = platform.os 197 } 198 img.id = cs.ImageID 199 img.containerRuntime = getContainerRuntime(cs.ContainerID) 200 201 // Upsert image owners. 202 if owner, found := img.owners[ownerResourceID]; found { 203 owner.podIDs[podID] = struct{}{} 204 } else { 205 img.owners[ownerResourceID] = &imageOwner{ 206 podIDs: map[string]struct{}{ 207 podID: {}, 208 }, 209 } 210 img.ownerChangedAt = now 211 } 212 213 d.images[key] = img 214 } 215 } 216 217 func (d *deltaState) handlePodDelete(pod *corev1.Pod) { 218 now := time.Now().UTC() 219 for imgKey, img := range d.images { 220 if img.architecture != d.getPodPlatform(pod).architecture { 221 continue 222 } 223 224 podID := string(pod.UID) 225 226 ownerResourceID := d.kubeClient.GetOwnerUID(pod) 227 if owner, found := img.owners[ownerResourceID]; found { 228 delete(owner.podIDs, podID) 229 if len(owner.podIDs) == 0 { 230 delete(img.owners, ownerResourceID) 231 img.ownerChangedAt = now 232 } 233 } 234 235 if len(img.owners) == 0 { 236 delete(d.images, imgKey) 237 } 238 } 239 } 240 241 type platform struct { 242 architecture string 243 os string 244 } 245 246 func (d *deltaState) getPodPlatform(pod *corev1.Pod) platform { 247 n, ok := d.nodes[pod.Spec.NodeName] 248 if ok && n.Status.NodeInfo.Architecture != "" && n.Status.NodeInfo.OperatingSystem != "" { 249 return platform{ 250 architecture: n.Status.NodeInfo.Architecture, 251 os: n.Status.NodeInfo.OperatingSystem, 252 } 253 } 254 return platform{ 255 architecture: defaultImageArch, 256 os: defaultImageOs, 257 } 258 } 259 260 func getContainerRuntime(containerID string) imagescanconfig.Runtime { 261 parts := strings.Split(containerID, "://") 262 if len(parts) != 2 { 263 return "" 264 } 265 cr := parts[0] 266 switch cr { 267 case "docker": 268 return imagescanconfig.RuntimeDocker 269 case "containerd": 270 return imagescanconfig.RuntimeContainerd 271 } 272 return "" 273 } 274 275 type imageOwner struct { 276 podIDs map[string]struct{} 277 } 278 279 type image struct { 280 key string // used in map[string]*image 281 282 // id is ImageID from container status. It includes image name and digest. 283 // 284 // Note: ImageID's digest part could confuse you with actual image digest. 285 // Kubernetes calculates digest based on one of these cases: 286 // 1. Index manifest (if exists). 287 // 2. Manifest file. 288 // 3. Config file. Mostly legacy for old images without manifest. 289 id string 290 291 // name is image name from container spec. 292 // 293 // Note: We select image name from container spec (not from container status). 294 // In container status you will see fully qualified image name, eg. docker.io/grafana/grafana:latest 295 // while on container spec you will see user defined image name which may not be fully qualified, eg: grafana/grafana:latest 296 name string 297 298 architecture string 299 os string 300 containerRuntime imagescanconfig.Runtime 301 302 // owners map key points to higher level k8s resource for that image. (Image Affected resource in CAST AI console). 303 // Example: In most cases Pod will be managed by deployment, so owner id will point to Deployment's uuid. 304 owners map[string]*imageOwner 305 306 scanned bool 307 lastScanErr error 308 failures int // Used for sorting. We want to scan non-failed images first. 309 retryBackoff wait.Backoff // Retry state for failed images. 310 nextScan time.Time // Set based on retry backoff. 311 312 lastRemoteSyncAt time.Time // Time then image state was synced from remote. 313 ownerChangedAt time.Time // Time when new image owner was added 314 resourcesUpdatedAt time.Time // Time when image was synced with backend 315 }