github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/casext/walk.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016-2020 SUSE LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package casext 19 20 import ( 21 "context" 22 "errors" 23 24 "github.com/apex/log" 25 "github.com/opencontainers/go-digest" 26 ispec "github.com/opencontainers/image-spec/specs-go/v1" 27 28 "github.com/opencontainers/umoci/oci/cas" 29 "github.com/opencontainers/umoci/oci/casext/mediatype" 30 ) 31 32 // childDescriptors is a wrapper around MapDescriptors which just creates a 33 // slice of all of the arguments, and doesn't modify them. 34 func childDescriptors(i interface{}) []ispec.Descriptor { 35 var children []ispec.Descriptor 36 if err := MapDescriptors(i, func(descriptor ispec.Descriptor) ispec.Descriptor { 37 children = append(children, descriptor) 38 return descriptor 39 }); err != nil { 40 // If we got an error, this is a bug in MapDescriptors proper. 41 log.Fatalf("[internal error] MapDescriptors returned an error inside childDescriptors: %+v", err) 42 } 43 return children 44 } 45 46 // walkState stores state information about the recursion into a given 47 // descriptor tree. 48 type walkState struct { 49 // engine is the CAS engine we are operating on. 50 engine Engine 51 52 // walkFunc is the WalkFunc provided by the user. 53 walkFunc WalkFunc 54 } 55 56 // DescriptorPath is used to describe the path of descriptors (from a top-level 57 // index) that were traversed when resolving a particular reference name. The 58 // purpose of this is to allow libraries like github.com/opencontainers/umoci/mutate 59 // to handle generic manifest updates given an arbitrary descriptor walk. Users 60 // of ResolveReference that don't care about the descriptor path can just use 61 // .Descriptor. 62 type DescriptorPath struct { 63 // Walk is the set of descriptors walked to reach Descriptor (inclusive). 64 // The order is the same as the order of the walk, with the target being 65 // the last entry and the entrypoint from index.json being the first. 66 Walk []ispec.Descriptor `json:"descriptor_walk"` 67 } 68 69 // Root returns the first step in the DescriptorPath, which is the point where 70 // the walk started. This is just shorthand for DescriptorPath.Walk[0]. Root 71 // will *panic* if DescriptorPath is invalid. 72 func (d DescriptorPath) Root() ispec.Descriptor { 73 if len(d.Walk) < 1 { 74 panic("empty DescriptorPath") 75 } 76 return d.Walk[0] 77 } 78 79 // Descriptor returns the final step in the DescriptorPath, which is the target 80 // descriptor being referenced by DescriptorPath. This is just shorthand for 81 // accessing the last entry of DescriptorPath.Walk. Descriptor will *panic* if 82 // DescriptorPath is invalid. 83 func (d DescriptorPath) Descriptor() ispec.Descriptor { 84 if len(d.Walk) < 1 { 85 panic("empty DescriptorPath") 86 } 87 return d.Walk[len(d.Walk)-1] 88 } 89 90 // ErrSkipDescriptor is a special error returned by WalkFunc which will cause 91 // Walk to not recurse into the descriptor currently being evaluated by 92 // WalkFunc. This interface is roughly equivalent to filepath.SkipDir. 93 var ErrSkipDescriptor = errors.New("[internal] do not recurse into descriptor") 94 95 // WalkFunc is the type of function passed to Walk. It will be a called on each 96 // descriptor encountered, recursively -- which may involve the function being 97 // called on the same descriptor multiple times (though because an OCI image is 98 // a Merkle tree there will never be any loops). If an error is returned by 99 // WalkFunc, the recursion will halt and the error will bubble up to the 100 // caller. 101 // 102 // TODO: Also provide Blob to WalkFunc so that callers don't need to load blobs 103 // 104 // more than once. This is quite important for remote CAS implementations. 105 type WalkFunc func(descriptorPath DescriptorPath) error 106 107 func (ws *walkState) recurse(ctx context.Context, descriptorPath DescriptorPath) (Err error) { 108 log.WithFields(log.Fields{ 109 "digest": descriptorPath.Descriptor().Digest, 110 }).Debugf("-> ws.recurse") 111 defer log.WithFields(log.Fields{ 112 "digest": descriptorPath.Descriptor().Digest, 113 }).Debugf("<- ws.recurse") 114 115 // Run walkFunc. 116 if err := ws.walkFunc(descriptorPath); err != nil { 117 if err == ErrSkipDescriptor { 118 return nil 119 } 120 return err 121 } 122 123 // Get blob to recurse into. 124 descriptor := descriptorPath.Descriptor() 125 126 // Since FromDescriptor gives us a full VerifiedReadCloser (meaning that 127 // Close is expensive if we don't read any bytes), we should only try to 128 // recurse into this thing if we actually can parse it. 129 if mediatype.GetParser(descriptor.MediaType) == nil { 130 log.Infof("skipping walk into non-parseable media-type %v of blob %v", descriptor.MediaType, descriptor.Digest) 131 return nil 132 } 133 134 // Recurse into the blob now. 135 blob, err := ws.engine.FromDescriptor(ctx, descriptor) 136 if err != nil { 137 // Ignore cases where the descriptor points to an object we don't know 138 // how to parse. 139 if err == cas.ErrUnknownType { 140 log.Infof("skipping walk into unknown media-type %v of blob %v", descriptor.MediaType, descriptor.Digest) 141 return nil 142 } 143 return err 144 } 145 defer func() { 146 if err := blob.Close(); err != nil { 147 log.Warnf("during recursion blob %v had error on Close: %v", descriptor.Digest, err) 148 if Err == nil { 149 Err = err 150 } 151 } 152 }() 153 154 // Recurse into children. 155 for _, child := range childDescriptors(blob.Data) { 156 if err := ws.recurse(ctx, DescriptorPath{ 157 Walk: append(descriptorPath.Walk, child), 158 }); err != nil { 159 return err 160 } 161 } 162 163 return nil 164 } 165 166 // Walk preforms a depth-first walk from a given root descriptor, using the 167 // provided CAS engine to fetch all other necessary descriptors. If an error is 168 // returned by the provided WalkFunc, walking is terminated and the error is 169 // returned to the caller. 170 func (e Engine) Walk(ctx context.Context, root ispec.Descriptor, walkFunc WalkFunc) error { 171 ws := &walkState{ 172 engine: e, 173 walkFunc: walkFunc, 174 } 175 return ws.recurse(ctx, DescriptorPath{ 176 Walk: []ispec.Descriptor{root}, 177 }) 178 } 179 180 // reachable returns the set of digests which can be reached using a descriptor 181 // path from the provided root descriptor. The returned slice will *not* 182 // contain any duplicate digest.Digest entries. 183 // 184 // Please note that without descriptors, a digest is not particularly meaninful 185 // (OCI blobs are not self-descriptive). This method primarily exists for GC() 186 // and any use outside of GC() should be carefully considered (you probably 187 // want to use Walk directly). 188 func (e Engine) reachable(ctx context.Context, root ispec.Descriptor) ([]digest.Digest, error) { 189 seen := map[digest.Digest]struct{}{} 190 if err := e.Walk(ctx, root, func(descriptorPath DescriptorPath) error { 191 digest := descriptorPath.Descriptor().Digest 192 if _, ok := seen[digest]; ok { 193 // Don't traverse further if we've already seen this digest. 194 return ErrSkipDescriptor 195 } 196 seen[digest] = struct{}{} 197 return nil 198 }); err != nil { 199 return nil, err 200 } 201 var reachable []digest.Digest 202 for node := range seen { 203 reachable = append(reachable, node) 204 } 205 return reachable, nil 206 }