github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/casext/gc.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016-2020 SUSE LLC
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package casext
    19  
    20  import (
    21  	"context"
    22  
    23  	"github.com/apex/log"
    24  	"github.com/opencontainers/go-digest"
    25  	ispec "github.com/opencontainers/image-spec/specs-go/v1"
    26  	"github.com/pkg/errors"
    27  )
    28  
    29  // GCPolicy is a policy function that returns 'true' if a blob can be GC'ed
    30  type GCPolicy func(ctx context.Context, digest digest.Digest) (bool, error)
    31  
    32  // GC will perform a mark-and-sweep garbage collection of the OCI image
    33  // referenced by the given CAS engine. The root set is taken to be the set of
    34  // references stored in the image, and all blobs not reachable by following a
    35  // descriptor path from the root set will be removed.
    36  //
    37  // GC will only call ListBlobs and ListReferences once, and assumes that there
    38  // is no change in the set of references or blobs after calling those
    39  // functions. In other words, it assumes it is the only user of the image that
    40  // is making modifications. Things will not go well if this assumption is
    41  // challenged.
    42  //
    43  // Furthermore, GC policies (zero or more) can also be specified which given a
    44  // blob's digest can indicate whether that blob needs to garbage collected. The
    45  // blob is skipped for garbage collection if a policy returns false.
    46  func (e Engine) GC(ctx context.Context, policies ...GCPolicy) error {
    47  	// Generate the root set of descriptors.
    48  	var root []ispec.Descriptor
    49  
    50  	index, err := e.GetIndex(ctx)
    51  	if err != nil {
    52  		return errors.Wrap(err, "get top-level index")
    53  	}
    54  
    55  	for _, descriptor := range index.Manifests {
    56  		log.WithFields(log.Fields{
    57  			"digest": descriptor.Digest,
    58  		}).Debugf("GC: got reference")
    59  		root = append(root, descriptor)
    60  	}
    61  
    62  	// Mark from the root sets.
    63  	black := map[digest.Digest]struct{}{}
    64  	for idx, descriptor := range root {
    65  		log.WithFields(log.Fields{
    66  			"digest": descriptor.Digest,
    67  		}).Debugf("GC: marking from root")
    68  
    69  		reachables, err := e.reachable(ctx, descriptor)
    70  		if err != nil {
    71  			return errors.Wrapf(err, "getting reachables from root %d", idx)
    72  		}
    73  		for _, reachable := range reachables {
    74  			black[reachable] = struct{}{}
    75  		}
    76  	}
    77  
    78  	// Sweep all blobs in the white set.
    79  	blobs, err := e.ListBlobs(ctx)
    80  	if err != nil {
    81  		return errors.Wrap(err, "get blob list")
    82  	}
    83  
    84  	n := 0
    85  sweep:
    86  	for _, digest := range blobs {
    87  		if _, ok := black[digest]; ok {
    88  			// Digest is in the black set.
    89  			continue
    90  		}
    91  
    92  		for i, policy := range policies {
    93  			ok, err := policy(ctx, digest)
    94  			if err != nil {
    95  				return errors.Wrapf(err, "invoking policy %d failed", i)
    96  			}
    97  
    98  			if !ok {
    99  				// skip this blob for GC
   100  				log.Debugf("skipping garbage collection of blob %s because of policy %d", digest, i)
   101  				continue sweep
   102  			}
   103  		}
   104  		log.Debugf("garbage collecting blob: %s", digest)
   105  
   106  		if err := e.DeleteBlob(ctx, digest); err != nil {
   107  			return errors.Wrapf(err, "remove unmarked blob %s", digest)
   108  		}
   109  		n++
   110  	}
   111  
   112  	// Finally, tell CAS to GC it.
   113  	if err := e.Clean(ctx); err != nil {
   114  		return errors.Wrapf(err, "clean engine")
   115  	}
   116  
   117  	log.Debugf("garbage collected %d blobs", n)
   118  	return nil
   119  }