github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/casext/walk.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016-2020 SUSE LLC
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package casext
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  
    24  	"github.com/apex/log"
    25  	"github.com/opencontainers/go-digest"
    26  	ispec "github.com/opencontainers/image-spec/specs-go/v1"
    27  
    28  	"github.com/opencontainers/umoci/oci/cas"
    29  	"github.com/opencontainers/umoci/oci/casext/mediatype"
    30  )
    31  
    32  // childDescriptors is a wrapper around MapDescriptors which just creates a
    33  // slice of all of the arguments, and doesn't modify them.
    34  func childDescriptors(i interface{}) []ispec.Descriptor {
    35  	var children []ispec.Descriptor
    36  	if err := MapDescriptors(i, func(descriptor ispec.Descriptor) ispec.Descriptor {
    37  		children = append(children, descriptor)
    38  		return descriptor
    39  	}); err != nil {
    40  		// If we got an error, this is a bug in MapDescriptors proper.
    41  		log.Fatalf("[internal error] MapDescriptors returned an error inside childDescriptors: %+v", err)
    42  	}
    43  	return children
    44  }
    45  
    46  // walkState stores state information about the recursion into a given
    47  // descriptor tree.
    48  type walkState struct {
    49  	// engine is the CAS engine we are operating on.
    50  	engine Engine
    51  
    52  	// walkFunc is the WalkFunc provided by the user.
    53  	walkFunc WalkFunc
    54  }
    55  
    56  // DescriptorPath is used to describe the path of descriptors (from a top-level
    57  // index) that were traversed when resolving a particular reference name. The
    58  // purpose of this is to allow libraries like github.com/opencontainers/umoci/mutate
    59  // to handle generic manifest updates given an arbitrary descriptor walk. Users
    60  // of ResolveReference that don't care about the descriptor path can just use
    61  // .Descriptor.
    62  type DescriptorPath struct {
    63  	// Walk is the set of descriptors walked to reach Descriptor (inclusive).
    64  	// The order is the same as the order of the walk, with the target being
    65  	// the last entry and the entrypoint from index.json being the first.
    66  	Walk []ispec.Descriptor `json:"descriptor_walk"`
    67  }
    68  
    69  // Root returns the first step in the DescriptorPath, which is the point where
    70  // the walk started. This is just shorthand for DescriptorPath.Walk[0]. Root
    71  // will *panic* if DescriptorPath is invalid.
    72  func (d DescriptorPath) Root() ispec.Descriptor {
    73  	if len(d.Walk) < 1 {
    74  		panic("empty DescriptorPath")
    75  	}
    76  	return d.Walk[0]
    77  }
    78  
    79  // Descriptor returns the final step in the DescriptorPath, which is the target
    80  // descriptor being referenced by DescriptorPath. This is just shorthand for
    81  // accessing the last entry of DescriptorPath.Walk. Descriptor will *panic* if
    82  // DescriptorPath is invalid.
    83  func (d DescriptorPath) Descriptor() ispec.Descriptor {
    84  	if len(d.Walk) < 1 {
    85  		panic("empty DescriptorPath")
    86  	}
    87  	return d.Walk[len(d.Walk)-1]
    88  }
    89  
    90  // ErrSkipDescriptor is a special error returned by WalkFunc which will cause
    91  // Walk to not recurse into the descriptor currently being evaluated by
    92  // WalkFunc. This interface is roughly equivalent to filepath.SkipDir.
    93  var ErrSkipDescriptor = errors.New("[internal] do not recurse into descriptor")
    94  
    95  // WalkFunc is the type of function passed to Walk. It will be a called on each
    96  // descriptor encountered, recursively -- which may involve the function being
    97  // called on the same descriptor multiple times (though because an OCI image is
    98  // a Merkle tree there will never be any loops). If an error is returned by
    99  // WalkFunc, the recursion will halt and the error will bubble up to the
   100  // caller.
   101  //
   102  // TODO: Also provide Blob to WalkFunc so that callers don't need to load blobs
   103  //
   104  //	more than once. This is quite important for remote CAS implementations.
   105  type WalkFunc func(descriptorPath DescriptorPath) error
   106  
   107  func (ws *walkState) recurse(ctx context.Context, descriptorPath DescriptorPath) (Err error) {
   108  	log.WithFields(log.Fields{
   109  		"digest": descriptorPath.Descriptor().Digest,
   110  	}).Debugf("-> ws.recurse")
   111  	defer log.WithFields(log.Fields{
   112  		"digest": descriptorPath.Descriptor().Digest,
   113  	}).Debugf("<- ws.recurse")
   114  
   115  	// Run walkFunc.
   116  	if err := ws.walkFunc(descriptorPath); err != nil {
   117  		if err == ErrSkipDescriptor {
   118  			return nil
   119  		}
   120  		return err
   121  	}
   122  
   123  	// Get blob to recurse into.
   124  	descriptor := descriptorPath.Descriptor()
   125  
   126  	// Since FromDescriptor gives us a full VerifiedReadCloser (meaning that
   127  	// Close is expensive if we don't read any bytes), we should only try to
   128  	// recurse into this thing if we actually can parse it.
   129  	if mediatype.GetParser(descriptor.MediaType) == nil {
   130  		log.Infof("skipping walk into non-parseable media-type %v of blob %v", descriptor.MediaType, descriptor.Digest)
   131  		return nil
   132  	}
   133  
   134  	// Recurse into the blob now.
   135  	blob, err := ws.engine.FromDescriptor(ctx, descriptor)
   136  	if err != nil {
   137  		// Ignore cases where the descriptor points to an object we don't know
   138  		// how to parse.
   139  		if err == cas.ErrUnknownType {
   140  			log.Infof("skipping walk into unknown media-type %v of blob %v", descriptor.MediaType, descriptor.Digest)
   141  			return nil
   142  		}
   143  		return err
   144  	}
   145  	defer func() {
   146  		if err := blob.Close(); err != nil {
   147  			log.Warnf("during recursion blob %v had error on Close: %v", descriptor.Digest, err)
   148  			if Err == nil {
   149  				Err = err
   150  			}
   151  		}
   152  	}()
   153  
   154  	// Recurse into children.
   155  	for _, child := range childDescriptors(blob.Data) {
   156  		if err := ws.recurse(ctx, DescriptorPath{
   157  			Walk: append(descriptorPath.Walk, child),
   158  		}); err != nil {
   159  			return err
   160  		}
   161  	}
   162  
   163  	return nil
   164  }
   165  
   166  // Walk preforms a depth-first walk from a given root descriptor, using the
   167  // provided CAS engine to fetch all other necessary descriptors. If an error is
   168  // returned by the provided WalkFunc, walking is terminated and the error is
   169  // returned to the caller.
   170  func (e Engine) Walk(ctx context.Context, root ispec.Descriptor, walkFunc WalkFunc) error {
   171  	ws := &walkState{
   172  		engine:   e,
   173  		walkFunc: walkFunc,
   174  	}
   175  	return ws.recurse(ctx, DescriptorPath{
   176  		Walk: []ispec.Descriptor{root},
   177  	})
   178  }
   179  
   180  // reachable returns the set of digests which can be reached using a descriptor
   181  // path from the provided root descriptor. The returned slice will *not*
   182  // contain any duplicate digest.Digest entries.
   183  //
   184  // Please note that without descriptors, a digest is not particularly meaninful
   185  // (OCI blobs are not self-descriptive). This method primarily exists for GC()
   186  // and any use outside of GC() should be carefully considered (you probably
   187  // want to use Walk directly).
   188  func (e Engine) reachable(ctx context.Context, root ispec.Descriptor) ([]digest.Digest, error) {
   189  	seen := map[digest.Digest]struct{}{}
   190  	if err := e.Walk(ctx, root, func(descriptorPath DescriptorPath) error {
   191  		digest := descriptorPath.Descriptor().Digest
   192  		if _, ok := seen[digest]; ok {
   193  			// Don't traverse further if we've already seen this digest.
   194  			return ErrSkipDescriptor
   195  		}
   196  		seen[digest] = struct{}{}
   197  		return nil
   198  	}); err != nil {
   199  		return nil, err
   200  	}
   201  	var reachable []digest.Digest
   202  	for node := range seen {
   203  		reachable = append(reachable, node)
   204  	}
   205  	return reachable, nil
   206  }