k8s.io/kubernetes@v1.29.3/pkg/kubelet/images/image_gc_manager.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package images
    18  
    19  import (
    20  	"context"
    21  	goerrors "errors"
    22  	"fmt"
    23  	"math"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"go.opentelemetry.io/otel/trace"
    30  	v1 "k8s.io/api/core/v1"
    31  	"k8s.io/klog/v2"
    32  
    33  	"k8s.io/apimachinery/pkg/util/errors"
    34  	"k8s.io/apimachinery/pkg/util/sets"
    35  	"k8s.io/apimachinery/pkg/util/wait"
    36  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    37  	"k8s.io/client-go/tools/record"
    38  	statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
    39  	"k8s.io/kubernetes/pkg/features"
    40  	"k8s.io/kubernetes/pkg/kubelet/container"
    41  	"k8s.io/kubernetes/pkg/kubelet/events"
    42  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    43  	"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
    44  )
    45  
    46  // instrumentationScope is OpenTelemetry instrumentation scope name
    47  const instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/images"
    48  
    49  // When RuntimeClassInImageCriAPI feature gate is enabled, imageRecord is
    50  // indexed as imageId-RuntimeHandler
    51  const imageIndexTupleFormat = "%s,%s"
    52  
    53  // StatsProvider is an interface for fetching stats used during image garbage
    54  // collection.
    55  type StatsProvider interface {
    56  	// ImageFsStats returns the stats of the image filesystem.
    57  	ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error)
    58  }
    59  
    60  // ImageGCManager is an interface for managing lifecycle of all images.
    61  // Implementation is thread-safe.
    62  type ImageGCManager interface {
    63  	// Applies the garbage collection policy. Errors include being unable to free
    64  	// enough space as per the garbage collection policy.
    65  	GarbageCollect(ctx context.Context) error
    66  
    67  	// Start async garbage collection of images.
    68  	Start()
    69  
    70  	GetImageList() ([]container.Image, error)
    71  
    72  	// Delete all unused images.
    73  	DeleteUnusedImages(ctx context.Context) error
    74  }
    75  
    76  // ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in
    77  // which garbage collection will be run.
    78  type ImageGCPolicy struct {
    79  	// Any usage above this threshold will always trigger garbage collection.
    80  	// This is the highest usage we will allow.
    81  	HighThresholdPercent int
    82  
    83  	// Any usage below this threshold will never trigger garbage collection.
    84  	// This is the lowest threshold we will try to garbage collect to.
    85  	LowThresholdPercent int
    86  
    87  	// Minimum age at which an image can be garbage collected.
    88  	MinAge time.Duration
    89  
    90  	// Maximum age after which an image can be garbage collected, regardless of disk usage.
    91  	// Currently gated by MaximumImageGCAge feature gate and Kubelet configuration.
    92  	// If 0, the feature is disabled.
    93  	MaxAge time.Duration
    94  }
    95  
    96  type realImageGCManager struct {
    97  	// Container runtime
    98  	runtime container.Runtime
    99  
   100  	// Records of images and their use. Indexed by ImageId.
   101  	// If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords
   102  	// are identified by a tuple of (imageId,runtimeHandler) that is passed
   103  	// from ListImages() call. If no runtimehandler is specified in response
   104  	// to ListImages() by the container runtime, only imageID will be used as
   105  	// the index of this map.
   106  	imageRecords     map[string]*imageRecord
   107  	imageRecordsLock sync.Mutex
   108  
   109  	// The image garbage collection policy in use.
   110  	policy ImageGCPolicy
   111  
   112  	// statsProvider provides stats used during image garbage collection.
   113  	statsProvider StatsProvider
   114  
   115  	// Recorder for Kubernetes events.
   116  	recorder record.EventRecorder
   117  
   118  	// Reference to this node.
   119  	nodeRef *v1.ObjectReference
   120  
   121  	// imageCache is the cache of latest image list.
   122  	imageCache imageCache
   123  
   124  	// tracer for recording spans
   125  	tracer trace.Tracer
   126  }
   127  
   128  // imageCache caches latest result of ListImages.
   129  type imageCache struct {
   130  	// sync.Mutex is the mutex protects the image cache.
   131  	sync.Mutex
   132  	// images is the image cache.
   133  	images []container.Image
   134  }
   135  
   136  // set sorts the input list and updates image cache.
   137  // 'i' takes ownership of the list, you should not reference the list again
   138  // after calling this function.
   139  func (i *imageCache) set(images []container.Image) {
   140  	i.Lock()
   141  	defer i.Unlock()
   142  	// The image list needs to be sorted when it gets read and used in
   143  	// setNodeStatusImages. We sort the list on write instead of on read,
   144  	// because the image cache is more often read than written
   145  	sort.Sort(sliceutils.ByImageSize(images))
   146  	i.images = images
   147  }
   148  
   149  // get gets image list from image cache.
   150  // NOTE: The caller of get() should not do mutating operations on the
   151  // returned list that could cause data race against other readers (e.g.
   152  // in-place sorting the returned list)
   153  func (i *imageCache) get() []container.Image {
   154  	i.Lock()
   155  	defer i.Unlock()
   156  	return i.images
   157  }
   158  
   159  // Information about the images we track.
   160  type imageRecord struct {
   161  	// runtime handler used to pull this image
   162  	runtimeHandlerUsedToPullImage string
   163  	// Time when this image was first detected.
   164  	firstDetected time.Time
   165  
   166  	// Time when we last saw this image being used.
   167  	lastUsed time.Time
   168  
   169  	// Size of the image in bytes.
   170  	size int64
   171  
   172  	// Pinned status of the image
   173  	pinned bool
   174  }
   175  
   176  // NewImageGCManager instantiates a new ImageGCManager object.
   177  func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
   178  	// Validate policy.
   179  	if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
   180  		return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
   181  	}
   182  	if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
   183  		return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
   184  	}
   185  	if policy.LowThresholdPercent > policy.HighThresholdPercent {
   186  		return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
   187  	}
   188  	tracer := tracerProvider.Tracer(instrumentationScope)
   189  	im := &realImageGCManager{
   190  		runtime:       runtime,
   191  		policy:        policy,
   192  		imageRecords:  make(map[string]*imageRecord),
   193  		statsProvider: statsProvider,
   194  		recorder:      recorder,
   195  		nodeRef:       nodeRef,
   196  		tracer:        tracer,
   197  	}
   198  
   199  	return im, nil
   200  }
   201  
   202  func (im *realImageGCManager) Start() {
   203  	ctx := context.Background()
   204  	go wait.Until(func() {
   205  		_, err := im.detectImages(ctx, time.Now())
   206  		if err != nil {
   207  			klog.InfoS("Failed to monitor images", "err", err)
   208  		}
   209  	}, 5*time.Minute, wait.NeverStop)
   210  
   211  	// Start a goroutine periodically updates image cache.
   212  	go wait.Until(func() {
   213  		images, err := im.runtime.ListImages(ctx)
   214  		if err != nil {
   215  			klog.InfoS("Failed to update image list", "err", err)
   216  		} else {
   217  			im.imageCache.set(images)
   218  		}
   219  	}, 30*time.Second, wait.NeverStop)
   220  
   221  }
   222  
   223  // Get a list of images on this node
   224  func (im *realImageGCManager) GetImageList() ([]container.Image, error) {
   225  	return im.imageCache.get(), nil
   226  }
   227  
   228  func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.Time) (sets.String, error) {
   229  	isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI)
   230  	imagesInUse := sets.NewString()
   231  
   232  	images, err := im.runtime.ListImages(ctx)
   233  	if err != nil {
   234  		return imagesInUse, err
   235  	}
   236  	pods, err := im.runtime.GetPods(ctx, true)
   237  	if err != nil {
   238  		return imagesInUse, err
   239  	}
   240  
   241  	// Make a set of images in use by containers.
   242  	for _, pod := range pods {
   243  		for _, container := range pod.Containers {
   244  			if !isRuntimeClassInImageCriAPIEnabled {
   245  				klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID)
   246  				imagesInUse.Insert(container.ImageID)
   247  			} else {
   248  				imageKey := getImageTuple(container.ImageID, container.ImageRuntimeHandler)
   249  				klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID, "imageKey", imageKey)
   250  				imagesInUse.Insert(imageKey)
   251  			}
   252  		}
   253  	}
   254  
   255  	// Add new images and record those being used.
   256  	now := time.Now()
   257  	currentImages := sets.NewString()
   258  	im.imageRecordsLock.Lock()
   259  	defer im.imageRecordsLock.Unlock()
   260  	for _, image := range images {
   261  		imageKey := image.ID
   262  		if !isRuntimeClassInImageCriAPIEnabled {
   263  			klog.V(5).InfoS("Adding image ID to currentImages", "imageID", imageKey)
   264  		} else {
   265  			imageKey = getImageTuple(image.ID, image.Spec.RuntimeHandler)
   266  			klog.V(5).InfoS("Adding image ID with runtime class to currentImages", "imageKey", imageKey, "runtimeHandler", image.Spec.RuntimeHandler)
   267  		}
   268  
   269  		currentImages.Insert(imageKey)
   270  
   271  		// New image, set it as detected now.
   272  		if _, ok := im.imageRecords[imageKey]; !ok {
   273  			klog.V(5).InfoS("Image ID is new", "imageID", imageKey, "runtimeHandler", image.Spec.RuntimeHandler)
   274  			im.imageRecords[imageKey] = &imageRecord{
   275  				firstDetected:                 detectTime,
   276  				runtimeHandlerUsedToPullImage: image.Spec.RuntimeHandler,
   277  			}
   278  		}
   279  
   280  		// Set last used time to now if the image is being used.
   281  		if isImageUsed(imageKey, imagesInUse) {
   282  			klog.V(5).InfoS("Setting Image ID lastUsed", "imageID", imageKey, "lastUsed", now)
   283  			im.imageRecords[imageKey].lastUsed = now
   284  		}
   285  
   286  		klog.V(5).InfoS("Image ID has size", "imageID", imageKey, "size", image.Size)
   287  		im.imageRecords[imageKey].size = image.Size
   288  
   289  		klog.V(5).InfoS("Image ID is pinned", "imageID", imageKey, "pinned", image.Pinned)
   290  		im.imageRecords[imageKey].pinned = image.Pinned
   291  	}
   292  
   293  	// Remove old images from our records.
   294  	for image := range im.imageRecords {
   295  		if !currentImages.Has(image) {
   296  			klog.V(5).InfoS("Image ID is no longer present; removing from imageRecords", "imageID", image)
   297  			delete(im.imageRecords, image)
   298  		}
   299  	}
   300  
   301  	return imagesInUse, nil
   302  }
   303  
   304  func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
   305  	ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
   306  	defer otelSpan.End()
   307  
   308  	freeTime := time.Now()
   309  	images, err := im.imagesInEvictionOrder(ctx, freeTime)
   310  	if err != nil {
   311  		return err
   312  	}
   313  
   314  	images, err = im.freeOldImages(ctx, images, freeTime)
   315  	if err != nil {
   316  		return err
   317  	}
   318  
   319  	// Get disk usage on disk holding images.
   320  	fsStats, _, err := im.statsProvider.ImageFsStats(ctx)
   321  	if err != nil {
   322  		return err
   323  	}
   324  
   325  	var capacity, available int64
   326  	if fsStats.CapacityBytes != nil {
   327  		capacity = int64(*fsStats.CapacityBytes)
   328  	}
   329  	if fsStats.AvailableBytes != nil {
   330  		available = int64(*fsStats.AvailableBytes)
   331  	}
   332  
   333  	if available > capacity {
   334  		klog.InfoS("Availability is larger than capacity", "available", available, "capacity", capacity)
   335  		available = capacity
   336  	}
   337  
   338  	// Check valid capacity.
   339  	if capacity == 0 {
   340  		err := goerrors.New("invalid capacity 0 on image filesystem")
   341  		im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
   342  		return err
   343  	}
   344  
   345  	// If over the max threshold, free enough to place us at the lower threshold.
   346  	usagePercent := 100 - int(available*100/capacity)
   347  	if usagePercent >= im.policy.HighThresholdPercent {
   348  		amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
   349  		klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent)
   350  		freed, err := im.freeSpace(ctx, amountToFree, freeTime, images)
   351  		if err != nil {
   352  			return err
   353  		}
   354  
   355  		if freed < amountToFree {
   356  			err := fmt.Errorf("Failed to garbage collect required amount of images. Attempted to free %d bytes, but only found %d bytes eligible to free.", amountToFree, freed)
   357  			im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
   358  			return err
   359  		}
   360  	}
   361  
   362  	return nil
   363  }
   364  
   365  func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) {
   366  	if im.policy.MaxAge == 0 {
   367  		return images, nil
   368  	}
   369  	var deletionErrors []error
   370  	remainingImages := make([]evictionInfo, 0)
   371  	for _, image := range images {
   372  		klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on image age", "imageID", image.id)
   373  		// Evaluate whether image is older than MaxAge.
   374  		if freeTime.Sub(image.lastUsed) > im.policy.MaxAge {
   375  			if err := im.freeImage(ctx, image); err != nil {
   376  				deletionErrors = append(deletionErrors, err)
   377  				remainingImages = append(remainingImages, image)
   378  				continue
   379  			}
   380  			continue
   381  		}
   382  		remainingImages = append(remainingImages, image)
   383  	}
   384  	if len(deletionErrors) > 0 {
   385  		return remainingImages, fmt.Errorf("wanted to free images older than %v, encountered errors in image deletion: %v", im.policy.MaxAge, errors.NewAggregate(deletionErrors))
   386  	}
   387  	return remainingImages, nil
   388  }
   389  
   390  func (im *realImageGCManager) DeleteUnusedImages(ctx context.Context) error {
   391  	klog.InfoS("Attempting to delete unused images")
   392  	freeTime := time.Now()
   393  	images, err := im.imagesInEvictionOrder(ctx, freeTime)
   394  	if err != nil {
   395  		return err
   396  	}
   397  	_, err = im.freeSpace(ctx, math.MaxInt64, freeTime, images)
   398  	return err
   399  }
   400  
   401  // Tries to free bytesToFree worth of images on the disk.
   402  //
   403  // Returns the number of bytes free and an error if any occurred. The number of
   404  // bytes freed is always returned.
   405  // Note that error may be nil and the number of bytes free may be less
   406  // than bytesToFree.
   407  func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time, images []evictionInfo) (int64, error) {
   408  	// Delete unused images until we've freed up enough space.
   409  	var deletionErrors []error
   410  	spaceFreed := int64(0)
   411  	for _, image := range images {
   412  		klog.V(5).InfoS("Evaluating image ID for possible garbage collection based on disk usage", "imageID", image.id, "runtimeHandler", image.imageRecord.runtimeHandlerUsedToPullImage)
   413  		// Images that are currently in used were given a newer lastUsed.
   414  		if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
   415  			klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime)
   416  			continue
   417  		}
   418  
   419  		// Avoid garbage collect the image if the image is not old enough.
   420  		// In such a case, the image may have just been pulled down, and will be used by a container right away.
   421  		if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
   422  			klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge)
   423  			continue
   424  		}
   425  
   426  		if err := im.freeImage(ctx, image); err != nil {
   427  			deletionErrors = append(deletionErrors, err)
   428  			continue
   429  		}
   430  		spaceFreed += image.size
   431  
   432  		if spaceFreed >= bytesToFree {
   433  			break
   434  		}
   435  	}
   436  
   437  	if len(deletionErrors) > 0 {
   438  		return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
   439  	}
   440  	return spaceFreed, nil
   441  }
   442  
   443  func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo) error {
   444  	isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI)
   445  	// Remove image. Continue despite errors.
   446  	var err error
   447  	klog.InfoS("Removing image to free bytes", "imageID", image.id, "size", image.size, "runtimeHandler", image.runtimeHandlerUsedToPullImage)
   448  	err = im.runtime.RemoveImage(ctx, container.ImageSpec{Image: image.id, RuntimeHandler: image.runtimeHandlerUsedToPullImage})
   449  	if err != nil {
   450  		return err
   451  	}
   452  
   453  	imageKey := image.id
   454  	if isRuntimeClassInImageCriAPIEnabled {
   455  		imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage)
   456  	}
   457  	delete(im.imageRecords, imageKey)
   458  
   459  	metrics.ImageGarbageCollectedTotal.Inc()
   460  	return err
   461  }
   462  
   463  // Queries all of the image records and arranges them in a slice of evictionInfo, sorted based on last time used, ignoring images pinned by the runtime.
   464  func (im *realImageGCManager) imagesInEvictionOrder(ctx context.Context, freeTime time.Time) ([]evictionInfo, error) {
   465  	isRuntimeClassInImageCriAPIEnabled := utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClassInImageCriAPI)
   466  	imagesInUse, err := im.detectImages(ctx, freeTime)
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  
   471  	im.imageRecordsLock.Lock()
   472  	defer im.imageRecordsLock.Unlock()
   473  
   474  	// Get all images in eviction order.
   475  	images := make([]evictionInfo, 0, len(im.imageRecords))
   476  	for image, record := range im.imageRecords {
   477  		if isImageUsed(image, imagesInUse) {
   478  			klog.V(5).InfoS("Image ID is being used", "imageID", image)
   479  			continue
   480  		}
   481  		// Check if image is pinned, prevent garbage collection
   482  		if record.pinned {
   483  			klog.V(5).InfoS("Image is pinned, skipping garbage collection", "imageID", image)
   484  			continue
   485  
   486  		}
   487  		if !isRuntimeClassInImageCriAPIEnabled {
   488  			images = append(images, evictionInfo{
   489  				id:          image,
   490  				imageRecord: *record,
   491  			})
   492  		} else {
   493  			imageID := getImageIDFromTuple(image)
   494  			// Ensure imageID is valid or else continue
   495  			if imageID == "" {
   496  				im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, "ImageID is not valid, skipping, ImageID: %v", imageID)
   497  				continue
   498  			}
   499  			images = append(images, evictionInfo{
   500  				id:          imageID,
   501  				imageRecord: *record,
   502  			})
   503  		}
   504  	}
   505  	sort.Sort(byLastUsedAndDetected(images))
   506  	return images, nil
   507  }
   508  
   509  // If RuntimeClassInImageCriAPI feature gate is enabled, imageRecords
   510  // are identified by a tuple of (imageId,runtimeHandler) that is passed
   511  // from ListImages() call. If no runtimehandler is specified in response
   512  // to ListImages() by the container runtime, only imageID will be will
   513  // be returned.
   514  func getImageTuple(imageID, runtimeHandler string) string {
   515  	if runtimeHandler == "" {
   516  		return imageID
   517  	}
   518  	return fmt.Sprintf(imageIndexTupleFormat, imageID, runtimeHandler)
   519  }
   520  
   521  // get imageID from the imageTuple
   522  func getImageIDFromTuple(image string) string {
   523  	imageTuples := strings.Split(image, ",")
   524  	return imageTuples[0]
   525  }
   526  
   527  type evictionInfo struct {
   528  	id string
   529  	imageRecord
   530  }
   531  
   532  type byLastUsedAndDetected []evictionInfo
   533  
   534  func (ev byLastUsedAndDetected) Len() int      { return len(ev) }
   535  func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] }
   536  func (ev byLastUsedAndDetected) Less(i, j int) bool {
   537  	// Sort by last used, break ties by detected.
   538  	if ev[i].lastUsed.Equal(ev[j].lastUsed) {
   539  		return ev[i].firstDetected.Before(ev[j].firstDetected)
   540  	}
   541  	return ev[i].lastUsed.Before(ev[j].lastUsed)
   542  }
   543  
   544  func isImageUsed(imageID string, imagesInUse sets.String) bool {
   545  	// Check the image ID.
   546  	if _, ok := imagesInUse[imageID]; ok {
   547  		return true
   548  	}
   549  	return false
   550  }