k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/images/image_gc_manager.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package images 18 19 import ( 20 "context" 21 goerrors "errors" 22 "fmt" 23 "math" 24 "sort" 25 "strings" 26 "sync" 27 "time" 28 29 "go.opentelemetry.io/otel/trace" 30 v1 "k8s.io/api/core/v1" 31 "k8s.io/klog/v2" 32 33 "k8s.io/apimachinery/pkg/util/errors" 34 "k8s.io/apimachinery/pkg/util/sets" 35 "k8s.io/apimachinery/pkg/util/wait" 36 utilfeature "k8s.io/apiserver/pkg/util/feature" 37 "k8s.io/client-go/tools/record" 38 statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 39 "k8s.io/kubernetes/pkg/features" 40 "k8s.io/kubernetes/pkg/kubelet/container" 41 "k8s.io/kubernetes/pkg/kubelet/events" 42 "k8s.io/kubernetes/pkg/kubelet/metrics" 43 "k8s.io/kubernetes/pkg/kubelet/util/sliceutils" 44 ) 45 46 // instrumentationScope is OpenTelemetry instrumentation scope name 47 const instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/images" 48 49 // When RuntimeClassInImageCriAPI feature gate is enabled, imageRecord is 50 // indexed as imageId-RuntimeHandler 51 const imageIndexTupleFormat = "%s,%s" 52 53 // ImageGarbageCollectedTotalReason* specify the reason why an image was garbage collected 54 // in the `image_garbage_collected_total` metric. 55 const ( 56 ImageGarbageCollectedTotalReasonAge = "age" 57 ImageGarbageCollectedTotalReasonSpace = "space" 58 ) 59 60 // StatsProvider is an interface for fetching stats used during image garbage 61 // collection. 62 type StatsProvider interface { 63 // ImageFsStats returns the stats of the image filesystem. 64 ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) 65 } 66 67 // ImageGCManager is an interface for managing lifecycle of all images. 68 // Implementation is thread-safe. 69 type ImageGCManager interface { 70 // Applies the garbage collection policy. Errors include being unable to free 71 // enough space as per the garbage collection policy. 72 GarbageCollect(ctx context.Context, beganGC time.Time) error 73 74 // Start async garbage collection of images. 75 Start() 76 77 GetImageList() ([]container.Image, error) 78 79 // Delete all unused images. 80 DeleteUnusedImages(ctx context.Context) error 81 } 82 83 // ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in 84 // which garbage collection will be run. 85 type ImageGCPolicy struct { 86 // Any usage above this threshold will always trigger garbage collection. 87 // This is the highest usage we will allow. 88 HighThresholdPercent int 89 90 // Any usage below this threshold will never trigger garbage collection. 91 // This is the lowest threshold we will try to garbage collect to. 92 LowThresholdPercent int 93 94 // Minimum age at which an image can be garbage collected. 95 MinAge time.Duration 96 97 // Maximum age after which an image can be garbage collected, regardless of disk usage. 98 // Currently gated by MaximumImageGCAge feature gate and Kubelet configuration. 99 // If 0, the feature is disabled. 100 MaxAge time.Duration 101 } 102 103 type realImageGCManager struct { 104 // Container runtime 105 runtime container.Runtime 106 107 // Records of images and their use. Indexed by ImageId. 108 // If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords 109 // are identified by a tuple of (imageId,runtimeHandler) that is passed 110 // from ListImages() call. If no runtimehandler is specified in response 111 // to ListImages() by the container runtime, only imageID will be used as 112 // the index of this map. 113 imageRecords map[string]*imageRecord 114 imageRecordsLock sync.Mutex 115 116 // The image garbage collection policy in use. 117 policy ImageGCPolicy 118 119 // statsProvider provides stats used during image garbage collection. 120 statsProvider StatsProvider 121 122 // Recorder for Kubernetes events. 123 recorder record.EventRecorder 124 125 // Reference to this node. 126 nodeRef *v1.ObjectReference 127 128 // imageCache is the cache of latest image list. 129 imageCache imageCache 130 131 // tracer for recording spans 132 tracer trace.Tracer 133 } 134 135 // imageCache caches latest result of ListImages. 136 type imageCache struct { 137 // sync.Mutex is the mutex protects the image cache. 138 sync.Mutex 139 // images is the image cache. 140 images []container.Image 141 } 142 143 // set sorts the input list and updates image cache. 144 // 'i' takes ownership of the list, you should not reference the list again 145 // after calling this function. 146 func (i *imageCache) set(images []container.Image) { 147 i.Lock() 148 defer i.Unlock() 149 // The image list needs to be sorted when it gets read and used in 150 // setNodeStatusImages. We sort the list on write instead of on read, 151 // because the image cache is more often read than written 152 sort.Sort(sliceutils.ByImageSize(images)) 153 i.images = images 154 } 155 156 // get gets image list from image cache. 157 // NOTE: The caller of get() should not do mutating operations on the 158 // returned list that could cause data race against other readers (e.g. 159 // in-place sorting the returned list) 160 func (i *imageCache) get() []container.Image { 161 i.Lock() 162 defer i.Unlock() 163 return i.images 164 } 165 166 // Information about the images we track. 167 type imageRecord struct { 168 // runtime handler used to pull this image 169 runtimeHandlerUsedToPullImage string 170 // Time when this image was first detected. 171 firstDetected time.Time 172 173 // Time when we last saw this image being used. 174 lastUsed time.Time 175 176 // Size of the image in bytes. 177 size int64 178 179 // Pinned status of the image 180 pinned bool 181 } 182 183 // NewImageGCManager instantiates a new ImageGCManager object. 184 func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) { 185 // Validate policy. 186 if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 { 187 return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent) 188 } 189 if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 { 190 return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent) 191 } 192 if policy.LowThresholdPercent > policy.HighThresholdPercent { 193 return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent) 194 } 195 tracer := tracerProvider.Tracer(instrumentationScope) 196 im := &realImageGCManager{ 197 runtime: runtime, 198 policy: policy, 199 imageRecords: make(map[string]*imageRecord), 200 statsProvider: statsProvider, 201 recorder: recorder, 202 nodeRef: nodeRef, 203 tracer: tracer, 204 } 205 206 return im, nil 207 } 208 209 func (im *realImageGCManager) Start() { 210 ctx := context.Background() 211 go wait.Until(func() { 212 _, err := im.detectImages(ctx, time.Now()) 213 if err != nil { 214 klog.InfoS("Failed to monitor images", "err", err) 215 } 216 }, 5*time.Minute, wait.NeverStop) 217 218 // Start a goroutine periodically updates image cache. 219 go wait.Until(func() { 220 images, err := im.runtime.ListImages(ctx) 221 if err != nil { 222 klog.InfoS("Failed to update image list", "err", err) 223 } else { 224 im.imageCache.set(images) 225 } 226 }, 30*time.Second, wait.NeverStop) 227 228 } 229 230 // Get a list of images on this node 231 func (im *realImageGCManager) GetImageList() ([]container.Image, error) { 232 return im.imageCache.get(), nil 233 } 234 235 func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.Time) (sets.String, error) { 236 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 237 imagesInUse := sets.NewString() 238 239 images, err := im.runtime.ListImages(ctx) 240 if err != nil { 241 return imagesInUse, err 242 } 243 pods, err := im.runtime.GetPods(ctx, true) 244 if err != nil { 245 return imagesInUse, err 246 } 247 248 // Make a set of images in use by containers. 249 for _, pod := range pods { 250 for _, container := range pod.Containers { 251 if !isRuntimeClassInImageCriAPIEnabled { 252 klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID, "imageRef", container.ImageRef) 253 imagesInUse.Insert(container.ImageID) 254 } else { 255 imageKey := getImageTuple(container.ImageID, container.ImageRuntimeHandler) 256 klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID, "imageRef", container.ImageRef, "imageKey", imageKey) 257 imagesInUse.Insert(imageKey) 258 } 259 } 260 } 261 262 // Add new images and record those being used. 263 now := time.Now() 264 currentImages := sets.NewString() 265 im.imageRecordsLock.Lock() 266 defer im.imageRecordsLock.Unlock() 267 for _, image := range images { 268 imageKey := image.ID 269 if !isRuntimeClassInImageCriAPIEnabled { 270 klog.V(5).InfoS("Adding image ID to currentImages", "imageID", imageKey) 271 } else { 272 imageKey = getImageTuple(image.ID, image.Spec.RuntimeHandler) 273 klog.V(5).InfoS("Adding image ID with runtime class to currentImages", "imageKey", imageKey, "runtimeHandler", image.Spec.RuntimeHandler) 274 } 275 276 currentImages.Insert(imageKey) 277 278 // New image, set it as detected now. 279 if _, ok := im.imageRecords[imageKey]; !ok { 280 klog.V(5).InfoS("Image ID is new", "imageID", imageKey, "runtimeHandler", image.Spec.RuntimeHandler) 281 im.imageRecords[imageKey] = &imageRecord{ 282 firstDetected: detectTime, 283 runtimeHandlerUsedToPullImage: image.Spec.RuntimeHandler, 284 } 285 } 286 287 // Set last used time to now if the image is being used. 288 if isImageUsed(imageKey, imagesInUse) { 289 klog.V(5).InfoS("Setting Image ID lastUsed", "imageID", imageKey, "lastUsed", now) 290 im.imageRecords[imageKey].lastUsed = now 291 } 292 293 klog.V(5).InfoS("Image ID has size", "imageID", imageKey, "size", image.Size) 294 im.imageRecords[imageKey].size = image.Size 295 296 klog.V(5).InfoS("Image ID is pinned", "imageID", imageKey, "pinned", image.Pinned) 297 im.imageRecords[imageKey].pinned = image.Pinned 298 } 299 300 // Remove old images from our records. 301 for image := range im.imageRecords { 302 if !currentImages.Has(image) { 303 klog.V(5).InfoS("Image ID is no longer present; removing from imageRecords", "imageID", image) 304 delete(im.imageRecords, image) 305 } 306 } 307 308 return imagesInUse, nil 309 } 310 311 func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.Time) error { 312 ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect") 313 defer otelSpan.End() 314 315 freeTime := time.Now() 316 images, err := im.imagesInEvictionOrder(ctx, freeTime) 317 if err != nil { 318 return err 319 } 320 321 images, err = im.freeOldImages(ctx, images, freeTime, beganGC) 322 if err != nil { 323 return err 324 } 325 326 // Get disk usage on disk holding images. 327 fsStats, _, err := im.statsProvider.ImageFsStats(ctx) 328 if err != nil { 329 return err 330 } 331 332 var capacity, available int64 333 if fsStats.CapacityBytes != nil { 334 capacity = int64(*fsStats.CapacityBytes) 335 } 336 if fsStats.AvailableBytes != nil { 337 available = int64(*fsStats.AvailableBytes) 338 } 339 340 if available > capacity { 341 klog.InfoS("Availability is larger than capacity", "available", available, "capacity", capacity) 342 available = capacity 343 } 344 345 // Check valid capacity. 346 if capacity == 0 { 347 err := goerrors.New("invalid capacity 0 on image filesystem") 348 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error()) 349 return err 350 } 351 352 // If over the max threshold, free enough to place us at the lower threshold. 353 usagePercent := 100 - int(available*100/capacity) 354 if usagePercent >= im.policy.HighThresholdPercent { 355 amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available 356 klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent) 357 freed, err := im.freeSpace(ctx, amountToFree, freeTime, images) 358 if err != nil { 359 return err 360 } 361 362 if freed < amountToFree { 363 err := fmt.Errorf("Failed to garbage collect required amount of images. Attempted to free %d bytes, but only found %d bytes eligible to free.", amountToFree, freed) 364 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error()) 365 return err 366 } 367 } 368 369 return nil 370 } 371 372 func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) { 373 if im.policy.MaxAge == 0 { 374 return images, nil 375 } 376 377 // Wait until the MaxAge has passed since the Kubelet has started, 378 // or else we risk prematurely garbage collecting images. 379 if freeTime.Sub(beganGC) <= im.policy.MaxAge { 380 return images, nil 381 } 382 var deletionErrors []error 383 remainingImages := make([]evictionInfo, 0) 384 for _, image := range images { 385 klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on image age", "imageID", image.id) 386 // Evaluate whether image is older than MaxAge. 387 if freeTime.Sub(image.lastUsed) > im.policy.MaxAge { 388 if err := im.freeImage(ctx, image, ImageGarbageCollectedTotalReasonAge); err != nil { 389 deletionErrors = append(deletionErrors, err) 390 remainingImages = append(remainingImages, image) 391 continue 392 } 393 continue 394 } 395 remainingImages = append(remainingImages, image) 396 } 397 if len(deletionErrors) > 0 { 398 return remainingImages, fmt.Errorf("wanted to free images older than %v, encountered errors in image deletion: %v", im.policy.MaxAge, errors.NewAggregate(deletionErrors)) 399 } 400 return remainingImages, nil 401 } 402 403 func (im *realImageGCManager) DeleteUnusedImages(ctx context.Context) error { 404 klog.InfoS("Attempting to delete unused images") 405 freeTime := time.Now() 406 images, err := im.imagesInEvictionOrder(ctx, freeTime) 407 if err != nil { 408 return err 409 } 410 _, err = im.freeSpace(ctx, math.MaxInt64, freeTime, images) 411 return err 412 } 413 414 // Tries to free bytesToFree worth of images on the disk. 415 // 416 // Returns the number of bytes free and an error if any occurred. The number of 417 // bytes freed is always returned. 418 // Note that error may be nil and the number of bytes free may be less 419 // than bytesToFree. 420 func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time, images []evictionInfo) (int64, error) { 421 // Delete unused images until we've freed up enough space. 422 var deletionErrors []error 423 spaceFreed := int64(0) 424 for _, image := range images { 425 klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on disk usage", "imageID", image.id, "runtimeHandler", image.imageRecord.runtimeHandlerUsedToPullImage) 426 // Images that are currently in used were given a newer lastUsed. 427 if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) { 428 klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime) 429 continue 430 } 431 432 // Avoid garbage collect the image if the image is not old enough. 433 // In such a case, the image may have just been pulled down, and will be used by a container right away. 434 if freeTime.Sub(image.firstDetected) < im.policy.MinAge { 435 klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge) 436 continue 437 } 438 439 if err := im.freeImage(ctx, image, ImageGarbageCollectedTotalReasonSpace); err != nil { 440 deletionErrors = append(deletionErrors, err) 441 continue 442 } 443 spaceFreed += image.size 444 445 if spaceFreed >= bytesToFree { 446 break 447 } 448 } 449 450 if len(deletionErrors) > 0 { 451 return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors)) 452 } 453 return spaceFreed, nil 454 } 455 456 func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo, reason string) error { 457 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 458 // Remove image. Continue despite errors. 459 var err error 460 klog.InfoS("Removing image to free bytes", "imageID", image.id, "size", image.size, "runtimeHandler", image.runtimeHandlerUsedToPullImage) 461 err = im.runtime.RemoveImage(ctx, container.ImageSpec{Image: image.id, RuntimeHandler: image.runtimeHandlerUsedToPullImage}) 462 if err != nil { 463 return err 464 } 465 466 imageKey := image.id 467 if isRuntimeClassInImageCriAPIEnabled { 468 imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage) 469 } 470 delete(im.imageRecords, imageKey) 471 472 metrics.ImageGarbageCollectedTotal.WithLabelValues(reason).Inc() 473 return err 474 } 475 476 // Queries all of the image records and arranges them in a slice of evictionInfo, sorted based on last time used, ignoring images pinned by the runtime. 477 func (im *realImageGCManager) imagesInEvictionOrder(ctx context.Context, freeTime time.Time) ([]evictionInfo, error) { 478 isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI) 479 imagesInUse, err := im.detectImages(ctx, freeTime) 480 if err != nil { 481 return nil, err 482 } 483 484 im.imageRecordsLock.Lock() 485 defer im.imageRecordsLock.Unlock() 486 487 // Get all images in eviction order. 488 images := make([]evictionInfo, 0, len(im.imageRecords)) 489 for image, record := range im.imageRecords { 490 if isImageUsed(image, imagesInUse) { 491 klog.V(5).InfoS("Image ID is being used", "imageID", image) 492 continue 493 } 494 // Check if image is pinned, prevent garbage collection 495 if record.pinned { 496 klog.V(5).InfoS("Image is pinned, skipping garbage collection", "imageID", image) 497 continue 498 499 } 500 if !isRuntimeClassInImageCriAPIEnabled { 501 images = append(images, evictionInfo{ 502 id: image, 503 imageRecord: *record, 504 }) 505 } else { 506 imageID := getImageIDFromTuple(image) 507 // Ensure imageID is valid or else continue 508 if imageID == "" { 509 im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, "ImageID is not valid, skipping, ImageID: %v", imageID) 510 continue 511 } 512 images = append(images, evictionInfo{ 513 id: imageID, 514 imageRecord: *record, 515 }) 516 } 517 } 518 sort.Sort(byLastUsedAndDetected(images)) 519 return images, nil 520 } 521 522 // If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords 523 // are identified by a tuple of (imageId,runtimeHandler) that is passed 524 // from ListImages() call. If no runtimehandler is specified in response 525 // to ListImages() by the container runtime, only imageID will be will 526 // be returned. 527 func getImageTuple(imageID, runtimeHandler string) string { 528 if runtimeHandler == "" { 529 return imageID 530 } 531 return fmt.Sprintf(imageIndexTupleFormat, imageID, runtimeHandler) 532 } 533 534 // get imageID from the imageTuple 535 func getImageIDFromTuple(image string) string { 536 imageTuples := strings.Split(image, ",") 537 return imageTuples[0] 538 } 539 540 type evictionInfo struct { 541 id string 542 imageRecord 543 } 544 545 type byLastUsedAndDetected []evictionInfo 546 547 func (ev byLastUsedAndDetected) Len() int { return len(ev) } 548 func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] } 549 func (ev byLastUsedAndDetected) Less(i, j int) bool { 550 // Sort by last used, break ties by detected. 551 if ev[i].lastUsed.Equal(ev[j].lastUsed) { 552 return ev[i].firstDetected.Before(ev[j].firstDetected) 553 } 554 return ev[i].lastUsed.Before(ev[j].lastUsed) 555 } 556 557 func isImageUsed(imageID string, imagesInUse sets.String) bool { 558 // Check the image ID. 559 if _, ok := imagesInUse[imageID]; ok { 560 return true 561 } 562 return false 563 }