k8s.io/kubernetes@v1.29.3/pkg/kubelet/images/image_gc_manager.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package images 18 19 import ( 20 "context" 21 goerrors "errors" 22 "fmt" 23 "math" 24 "sort" 25 "strings" 26 "sync" 27 "time" 28 29 "go.opentelemetry.io/otel/trace" 30 v1 "k8s.io/api/core/v1" 31 "k8s.io/klog/v2" 32 33 "k8s.io/apimachinery/pkg/util/errors" 34 "k8s.io/apimachinery/pkg/util/sets" 35 "k8s.io/apimachinery/pkg/util/wait" 36 utilfeature "k8s.io/apiserver/pkg/util/feature" 37 "k8s.io/client-go/tools/record" 38 statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 39 "k8s.io/kubernetes/pkg/features" 40 "k8s.io/kubernetes/pkg/kubelet/container" 41 "k8s.io/kubernetes/pkg/kubelet/events" 42 "k8s.io/kubernetes/pkg/kubelet/metrics" 43 "k8s.io/kubernetes/pkg/kubelet/util/sliceutils" 44 ) 45 46 // instrumentationScope is OpenTelemetry instrumentation scope name 47 const instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/images" 48 49 // When RuntimeClassInImageCriAPI feature gate is enabled, imageRecord is 50 // indexed as imageId-RuntimeHandler 51 const imageIndexTupleFormat = "%s,%s" 52 53 // StatsProvider is an interface for fetching stats used during image garbage 54 // collection. 55 type StatsProvider interface { 56 // ImageFsStats returns the stats of the image filesystem. 57 ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) 58 } 59 60 // ImageGCManager is an interface for managing lifecycle of all images. 61 // Implementation is thread-safe. 62 type ImageGCManager interface { 63 // Applies the garbage collection policy. Errors include being unable to free 64 // enough space as per the garbage collection policy. 65 GarbageCollect(ctx context.Context) error 66 67 // Start async garbage collection of images. 68 Start() 69 70 GetImageList() ([]container.Image, error) 71 72 // Delete all unused images. 73 DeleteUnusedImages(ctx context.Context) error 74 } 75 76 // ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in 77 // which garbage collection will be run. 78 type ImageGCPolicy struct { 79 // Any usage above this threshold will always trigger garbage collection. 80 // This is the highest usage we will allow. 81 HighThresholdPercent int 82 83 // Any usage below this threshold will never trigger garbage collection. 84 // This is the lowest threshold we will try to garbage collect to. 85 LowThresholdPercent int 86 87 // Minimum age at which an image can be garbage collected. 88 MinAge time.Duration 89 90 // Maximum age after which an image can be garbage collected, regardless of disk usage. 91 // Currently gated by MaximumImageGCAge feature gate and Kubelet configuration. 92 // If 0, the feature is disabled. 93 MaxAge time.Duration 94 } 95 96 type realImageGCManager struct { 97 // Container runtime 98 runtime container.Runtime 99 100 // Records of images and their use. Indexed by ImageId. 101 // If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords 102 // are identified by a tuple of (imageId,runtimeHandler) that is passed 103 // from ListImages() call. If no runtimehandler is specified in response 104 // to ListImages() by the container runtime, only imageID will be used as 105 // the index of this map. 106 imageRecords map[string]*imageRecord 107 imageRecordsLock sync.Mutex 108 109 // The image garbage collection policy in use. 110 policy ImageGCPolicy 111 112 // statsProvider provides stats used during image garbage collection. 113 statsProvider StatsProvider 114 115 // Recorder for Kubernetes events. 116 recorder record.EventRecorder 117 118 // Reference to this node. 119 nodeRef *v1.ObjectReference 120 121 // imageCache is the cache of latest image list. 122 imageCache imageCache 123 124 // tracer for recording spans 125 tracer trace.Tracer 126 } 127 128 // imageCache caches latest result of ListImages. 129 type imageCache struct { 130 // sync.Mutex is the mutex protects the image cache. 131 sync.Mutex 132 // images is the image cache. 133 images []container.Image 134 } 135 136 // set sorts the input list and updates image cache. 137 // 'i' takes ownership of the list, you should not reference the list again 138 // after calling this function. 139 func (i *imageCache) set(images []container.Image) { 140 i.Lock() 141 defer i.Unlock() 142 // The image list needs to be sorted when it gets read and used in 143 // setNodeStatusImages. We sort the list on write instead of on read, 144 // because the image cache is more often read than written 145 sort.Sort(sliceutils.ByImageSize(images)) 146 i.images = images 147 } 148 149 // get gets image list from image cache. 150 // NOTE: The caller of get() should not do mutating operations on the 151 // returned list that could cause data race against other readers (e.g. 152 // in-place sorting the returned list) 153 func (i *imageCache) get() []container.Image { 154 i.Lock() 155 defer i.Unlock() 156 return i.images 157 } 158 159 // Information about the images we track. 160 type imageRecord struct { 161 // runtime handler used to pull this image 162 runtimeHandlerUsedToPullImage string 163 // Time when this image was first detected. 164 firstDetected time.Time 165 166 // Time when we last saw this image being used. 167 lastUsed time.Time 168 169 // Size of the image in bytes. 170 size int64 171 172 // Pinned status of the image 173 pinned bool 174 } 175 176 // NewImageGCManager instantiates a new ImageGCManager object. 177 func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) { 178 // Validate policy. 179 if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 { 180 return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent) 181 } 182 if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 { 183 return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent) 184 } 185 if policy.LowThresholdPercent > policy.HighThresholdPercent { 186 return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent) 187 } 188 tracer := tracerProvider.Tracer(instrumentationScope) 189 im := &realImageGCManager{ 190 runtime: runtime, 191 policy: policy, 192 imageRecords: make(map[string]*imageRecord), 193 statsProvider: statsProvider, 194 recorder: recorder, 195 nodeRef: nodeRef, 196 tracer: tracer, 197 } 198 199 return im, nil 200 } 201 202 func (im *realImageGCManager) Start() { 203 ctx := context.Background() 204 go wait.Until(func() { 205 _, err := im.detectImages(ctx, time.Now()) 206 if err != nil { 207 klog.InfoS("Failed to monitor images", "err", err) 208 } 209 }, 5*time.Minute, wait.NeverStop) 210 211 // Start a goroutine periodically updates image cache. 212 go wait.Until(func() { 213 images, err := im.runtime.ListImages(ctx) 214 if err != nil { 215 klog.InfoS("Failed to update image list", "err", err) 216 } else { 217 im.imageCache.set(images) 218 } 219 }, 30*time.Second, wait.NeverStop) 220 221 } 222 223 // Get a list of images on this node 224 func (im *realImageGCManager) GetImageList() ([]container.Image, error) { 225 return im.imageCache.get(), nil 226 } 227 228 func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.Time) (sets.String, error) { 229 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 230 imagesInUse := sets.NewString() 231 232 images, err := im.runtime.ListImages(ctx) 233 if err != nil { 234 return imagesInUse, err 235 } 236 pods, err := im.runtime.GetPods(ctx, true) 237 if err != nil { 238 return imagesInUse, err 239 } 240 241 // Make a set of images in use by containers. 242 for _, pod := range pods { 243 for _, container := range pod.Containers { 244 if !isRuntimeClassInImageCriAPIEnabled { 245 klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID) 246 imagesInUse.Insert(container.ImageID) 247 } else { 248 imageKey := getImageTuple(container.ImageID, container.ImageRuntimeHandler) 249 klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID, "imageKey", imageKey) 250 imagesInUse.Insert(imageKey) 251 } 252 } 253 } 254 255 // Add new images and record those being used. 256 now := time.Now() 257 currentImages := sets.NewString() 258 im.imageRecordsLock.Lock() 259 defer im.imageRecordsLock.Unlock() 260 for _, image := range images { 261 imageKey := image.ID 262 if !isRuntimeClassInImageCriAPIEnabled { 263 klog.V(5).InfoS("Adding image ID to currentImages", "imageID", imageKey) 264 } else { 265 imageKey = getImageTuple(image.ID, image.Spec.RuntimeHandler) 266 klog.V(5).InfoS("Adding image ID with runtime class to currentImages", "imageKey", imageKey, "runtimeHandler", image.Spec.RuntimeHandler) 267 } 268 269 currentImages.Insert(imageKey) 270 271 // New image, set it as detected now. 272 if _, ok := im.imageRecords[imageKey]; !ok { 273 klog.V(5).InfoS("Image ID is new", "imageID", imageKey, "runtimeHandler", image.Spec.RuntimeHandler) 274 im.imageRecords[imageKey] = &imageRecord{ 275 firstDetected: detectTime, 276 runtimeHandlerUsedToPullImage: image.Spec.RuntimeHandler, 277 } 278 } 279 280 // Set last used time to now if the image is being used. 281 if isImageUsed(imageKey, imagesInUse) { 282 klog.V(5).InfoS("Setting Image ID lastUsed", "imageID", imageKey, "lastUsed", now) 283 im.imageRecords[imageKey].lastUsed = now 284 } 285 286 klog.V(5).InfoS("Image ID has size", "imageID", imageKey, "size", image.Size) 287 im.imageRecords[imageKey].size = image.Size 288 289 klog.V(5).InfoS("Image ID is pinned", "imageID", imageKey, "pinned", image.Pinned) 290 im.imageRecords[imageKey].pinned = image.Pinned 291 } 292 293 // Remove old images from our records. 294 for image := range im.imageRecords { 295 if !currentImages.Has(image) { 296 klog.V(5).InfoS("Image ID is no longer present; removing from imageRecords", "imageID", image) 297 delete(im.imageRecords, image) 298 } 299 } 300 301 return imagesInUse, nil 302 } 303 304 func (im *realImageGCManager) GarbageCollect(ctx context.Context) error { 305 ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect") 306 defer otelSpan.End() 307 308 freeTime := time.Now() 309 images, err := im.imagesInEvictionOrder(ctx, freeTime) 310 if err != nil { 311 return err 312 } 313 314 images, err = im.freeOldImages(ctx, images, freeTime) 315 if err != nil { 316 return err 317 } 318 319 // Get disk usage on disk holding images. 320 fsStats, _, err := im.statsProvider.ImageFsStats(ctx) 321 if err != nil { 322 return err 323 } 324 325 var capacity, available int64 326 if fsStats.CapacityBytes != nil { 327 capacity = int64(*fsStats.CapacityBytes) 328 } 329 if fsStats.AvailableBytes != nil { 330 available = int64(*fsStats.AvailableBytes) 331 } 332 333 if available > capacity { 334 klog.InfoS("Availability is larger than capacity", "available", available, "capacity", capacity) 335 available = capacity 336 } 337 338 // Check valid capacity. 339 if capacity == 0 { 340 err := goerrors.New("invalid capacity 0 on image filesystem") 341 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error()) 342 return err 343 } 344 345 // If over the max threshold, free enough to place us at the lower threshold. 346 usagePercent := 100 - int(available*100/capacity) 347 if usagePercent >= im.policy.HighThresholdPercent { 348 amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available 349 klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent) 350 freed, err := im.freeSpace(ctx, amountToFree, freeTime, images) 351 if err != nil { 352 return err 353 } 354 355 if freed < amountToFree { 356 err := fmt.Errorf("Failed to garbage collect required amount of images. Attempted to free %d bytes, but only found %d bytes eligible to free.", amountToFree, freed) 357 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error()) 358 return err 359 } 360 } 361 362 return nil 363 } 364 365 func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) { 366 if im.policy.MaxAge == 0 { 367 return images, nil 368 } 369 var deletionErrors []error 370 remainingImages := make([]evictionInfo, 0) 371 for _, image := range images { 372 klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on image age", "imageID", image.id) 373 // Evaluate whether image is older than MaxAge. 374 if freeTime.Sub(image.lastUsed) > im.policy.MaxAge { 375 if err := im.freeImage(ctx, image); err != nil { 376 deletionErrors = append(deletionErrors, err) 377 remainingImages = append(remainingImages, image) 378 continue 379 } 380 continue 381 } 382 remainingImages = append(remainingImages, image) 383 } 384 if len(deletionErrors) > 0 { 385 return remainingImages, fmt.Errorf("wanted to free images older than %v, encountered errors in image deletion: %v", im.policy.MaxAge, errors.NewAggregate(deletionErrors)) 386 } 387 return remainingImages, nil 388 } 389 390 func (im *realImageGCManager) DeleteUnusedImages(ctx context.Context) error { 391 klog.InfoS("Attempting to delete unused images") 392 freeTime := time.Now() 393 images, err := im.imagesInEvictionOrder(ctx, freeTime) 394 if err != nil { 395 return err 396 } 397 _, err = im.freeSpace(ctx, math.MaxInt64, freeTime, images) 398 return err 399 } 400 401 // Tries to free bytesToFree worth of images on the disk. 402 // 403 // Returns the number of bytes free and an error if any occurred. The number of 404 // bytes freed is always returned. 405 // Note that error may be nil and the number of bytes free may be less 406 // than bytesToFree. 407 func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time, images []evictionInfo) (int64, error) { 408 // Delete unused images until we've freed up enough space. 409 var deletionErrors []error 410 spaceFreed := int64(0) 411 for _, image := range images { 412 klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on disk usage", "imageID", image.id, "runtimeHandler", image.imageRecord.runtimeHandlerUsedToPullImage) 413 // Images that are currently in used were given a newer lastUsed. 414 if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) { 415 klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime) 416 continue 417 } 418 419 // Avoid garbage collect the image if the image is not old enough. 420 // In such a case, the image may have just been pulled down, and will be used by a container right away. 421 if freeTime.Sub(image.firstDetected) < im.policy.MinAge { 422 klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge) 423 continue 424 } 425 426 if err := im.freeImage(ctx, image); err != nil { 427 deletionErrors = append(deletionErrors, err) 428 continue 429 } 430 spaceFreed += image.size 431 432 if spaceFreed >= bytesToFree { 433 break 434 } 435 } 436 437 if len(deletionErrors) > 0 { 438 return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors)) 439 } 440 return spaceFreed, nil 441 } 442 443 func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo) error { 444 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 445 // Remove image. Continue despite errors. 446 var err error 447 klog.InfoS("Removing image to free bytes", "imageID", image.id, "size", image.size, "runtimeHandler", image.runtimeHandlerUsedToPullImage) 448 err = im.runtime.RemoveImage(ctx, container.ImageSpec{Image: image.id, RuntimeHandler: image.runtimeHandlerUsedToPullImage}) 449 if err != nil { 450 return err 451 } 452 453 imageKey := image.id 454 if isRuntimeClassInImageCriAPIEnabled { 455 imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage) 456 } 457 delete(im.imageRecords, imageKey) 458 459 metrics.ImageGarbageCollectedTotal.Inc() 460 return err 461 } 462 463 // Queries all of the image records and arranges them in a slice of evictionInfo, sorted based on last time used, ignoring images pinned by the runtime. 464 func (im *realImageGCManager) imagesInEvictionOrder(ctx context.Context, freeTime time.Time) ([]evictionInfo, error) { 465 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 466 imagesInUse, err := im.detectImages(ctx, freeTime) 467 if err != nil { 468 return nil, err 469 } 470 471 im.imageRecordsLock.Lock() 472 defer im.imageRecordsLock.Unlock() 473 474 // Get all images in eviction order. 475 images := make([]evictionInfo, 0, len(im.imageRecords)) 476 for image, record := range im.imageRecords { 477 if isImageUsed(image, imagesInUse) { 478 klog.V(5).InfoS("Image ID is being used", "imageID", image) 479 continue 480 } 481 // Check if image is pinned, prevent garbage collection 482 if record.pinned { 483 klog.V(5).InfoS("Image is pinned, skipping garbage collection", "imageID", image) 484 continue 485 486 } 487 if !isRuntimeClassInImageCriAPIEnabled { 488 images = append(images, evictionInfo{ 489 id: image, 490 imageRecord: *record, 491 }) 492 } else { 493 imageID := getImageIDFromTuple(image) 494 // Ensure imageID is valid or else continue 495 if imageID == "" { 496 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, "ImageID is not valid, skipping, ImageID: %v", imageID) 497 continue 498 } 499 images = append(images, evictionInfo{ 500 id: imageID, 501 imageRecord: *record, 502 }) 503 } 504 } 505 sort.Sort(byLastUsedAndDetected(images)) 506 return images, nil 507 } 508 509 // If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords 510 // are identified by a tuple of (imageId,runtimeHandler) that is passed 511 // from ListImages() call. If no runtimehandler is specified in response 512 // to ListImages() by the container runtime, only imageID will be will 513 // be returned. 514 func getImageTuple(imageID, runtimeHandler string) string { 515 if runtimeHandler == "" { 516 return imageID 517 } 518 return fmt.Sprintf(imageIndexTupleFormat, imageID, runtimeHandler) 519 } 520 521 // get imageID from the imageTuple 522 func getImageIDFromTuple(image string) string { 523 imageTuples := strings.Split(image, ",") 524 return imageTuples[0] 525 } 526 527 type evictionInfo struct { 528 id string 529 imageRecord 530 } 531 532 type byLastUsedAndDetected []evictionInfo 533 534 func (ev byLastUsedAndDetected) Len() int { return len(ev) } 535 func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] } 536 func (ev byLastUsedAndDetected) Less(i, j int) bool { 537 // Sort by last used, break ties by detected. 538 if ev[i].lastUsed.Equal(ev[j].lastUsed) { 539 return ev[i].firstDetected.Before(ev[j].firstDetected) 540 } 541 return ev[i].lastUsed.Before(ev[j].lastUsed) 542 } 543 544 func isImageUsed(imageID string, imagesInUse sets.String) bool { 545 // Check the image ID. 546 if _, ok := imagesInUse[imageID]; ok { 547 return true 548 } 549 return false 550 }