github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/casext/gc.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016-2020 SUSE LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package casext 19 20 import ( 21 "context" 22 23 "github.com/apex/log" 24 "github.com/opencontainers/go-digest" 25 ispec "github.com/opencontainers/image-spec/specs-go/v1" 26 "github.com/pkg/errors" 27 ) 28 29 // GCPolicy is a policy function that returns 'true' if a blob can be GC'ed 30 type GCPolicy func(ctx context.Context, digest digest.Digest) (bool, error) 31 32 // GC will perform a mark-and-sweep garbage collection of the OCI image 33 // referenced by the given CAS engine. The root set is taken to be the set of 34 // references stored in the image, and all blobs not reachable by following a 35 // descriptor path from the root set will be removed. 36 // 37 // GC will only call ListBlobs and ListReferences once, and assumes that there 38 // is no change in the set of references or blobs after calling those 39 // functions. In other words, it assumes it is the only user of the image that 40 // is making modifications. Things will not go well if this assumption is 41 // challenged. 42 // 43 // Furthermore, GC policies (zero or more) can also be specified which given a 44 // blob's digest can indicate whether that blob needs to garbage collected. The 45 // blob is skipped for garbage collection if a policy returns false. 46 func (e Engine) GC(ctx context.Context, policies ...GCPolicy) error { 47 // Generate the root set of descriptors. 48 var root []ispec.Descriptor 49 50 index, err := e.GetIndex(ctx) 51 if err != nil { 52 return errors.Wrap(err, "get top-level index") 53 } 54 55 for _, descriptor := range index.Manifests { 56 log.WithFields(log.Fields{ 57 "digest": descriptor.Digest, 58 }).Debugf("GC: got reference") 59 root = append(root, descriptor) 60 } 61 62 // Mark from the root sets. 63 black := map[digest.Digest]struct{}{} 64 for idx, descriptor := range root { 65 log.WithFields(log.Fields{ 66 "digest": descriptor.Digest, 67 }).Debugf("GC: marking from root") 68 69 reachables, err := e.reachable(ctx, descriptor) 70 if err != nil { 71 return errors.Wrapf(err, "getting reachables from root %d", idx) 72 } 73 for _, reachable := range reachables { 74 black[reachable] = struct{}{} 75 } 76 } 77 78 // Sweep all blobs in the white set. 79 blobs, err := e.ListBlobs(ctx) 80 if err != nil { 81 return errors.Wrap(err, "get blob list") 82 } 83 84 n := 0 85 sweep: 86 for _, digest := range blobs { 87 if _, ok := black[digest]; ok { 88 // Digest is in the black set. 89 continue 90 } 91 92 for i, policy := range policies { 93 ok, err := policy(ctx, digest) 94 if err != nil { 95 return errors.Wrapf(err, "invoking policy %d failed", i) 96 } 97 98 if !ok { 99 // skip this blob for GC 100 log.Debugf("skipping garbage collection of blob %s because of policy %d", digest, i) 101 continue sweep 102 } 103 } 104 log.Debugf("garbage collecting blob: %s", digest) 105 106 if err := e.DeleteBlob(ctx, digest); err != nil { 107 return errors.Wrapf(err, "remove unmarked blob %s", digest) 108 } 109 n++ 110 } 111 112 // Finally, tell CAS to GC it. 113 if err := e.Clean(ctx); err != nil { 114 return errors.Wrapf(err, "clean engine") 115 } 116 117 log.Debugf("garbage collected %d blobs", n) 118 return nil 119 }