github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_all_layers.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  
     8  	stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
     9  	"github.com/anchore/stereoscope/pkg/filetree"
    10  	"github.com/anchore/stereoscope/pkg/image"
    11  	"github.com/anchore/syft/internal/log"
    12  	"github.com/anchore/syft/syft/file"
    13  )
    14  
    15  var _ file.Resolver = (*ContainerImageAllLayers)(nil)
    16  
    17  // ContainerImageAllLayers implements path and content access for the AllLayers source option for container image data sources.
    18  type ContainerImageAllLayers struct {
    19  	img            *image.Image
    20  	layers         []int
    21  	markVisibility bool
    22  }
    23  
    24  // NewFromContainerImageAllLayers returns a new resolver from the perspective of all image layers for the given image.
    25  func NewFromContainerImageAllLayers(img *image.Image) (*ContainerImageAllLayers, error) {
    26  	if len(img.Layers) == 0 {
    27  		return nil, fmt.Errorf("the image does not contain any layers")
    28  	}
    29  
    30  	var layers = make([]int, 0)
    31  	for idx := range img.Layers {
    32  		layers = append(layers, idx)
    33  	}
    34  	return &ContainerImageAllLayers{
    35  		img:    img,
    36  		layers: layers,
    37  		// This is the entrypoint for the user-facing implementation, which should always annotate locations.
    38  		// We have other resolvers that use this implementation that are already responsible
    39  		// for marking visibility, so we don't need to do it all of the time (a small performance optimization).
    40  		markVisibility: true,
    41  	}, nil
    42  }
    43  
    44  // HasPath indicates if the given path exists in the underlying source.
    45  func (r *ContainerImageAllLayers) HasPath(path string) bool {
    46  	p := stereoscopeFile.Path(path)
    47  	for _, layerIdx := range r.layers {
    48  		tree := r.img.Layers[layerIdx].Tree
    49  		if tree.HasPath(p) {
    50  			return true
    51  		}
    52  	}
    53  	return false
    54  }
    55  
    56  func (r *ContainerImageAllLayers) fileByRef(ref stereoscopeFile.Reference, uniqueFileIDs stereoscopeFile.ReferenceSet, layerIdx int) ([]stereoscopeFile.Reference, error) {
    57  	uniqueFiles := make([]stereoscopeFile.Reference, 0)
    58  
    59  	// since there is potentially considerable work for each symlink/hardlink that needs to be resolved, let's check to see if this is a symlink/hardlink first
    60  	entry, err := r.img.FileCatalog.Get(ref)
    61  	if err != nil {
    62  		return nil, fmt.Errorf("unable to fetch metadata (ref=%+v): %w", ref, err)
    63  	}
    64  
    65  	if entry.Type == stereoscopeFile.TypeHardLink || entry.Type == stereoscopeFile.TypeSymLink {
    66  		// a link may resolve in this layer or higher, assuming a squashed tree is used to search
    67  		// we should search all possible resolutions within the valid source
    68  		for _, subLayerIdx := range r.layers[layerIdx:] {
    69  			resolvedRef, err := r.img.ResolveLinkByLayerSquash(ref, subLayerIdx)
    70  			if err != nil {
    71  				return nil, fmt.Errorf("failed to resolve link from layer (layer=%d ref=%+v): %w", subLayerIdx, ref, err)
    72  			}
    73  			if resolvedRef.HasReference() && !uniqueFileIDs.Contains(*resolvedRef.Reference) {
    74  				uniqueFileIDs.Add(*resolvedRef.Reference)
    75  				uniqueFiles = append(uniqueFiles, *resolvedRef.Reference)
    76  			}
    77  		}
    78  	} else if !uniqueFileIDs.Contains(ref) {
    79  		uniqueFileIDs.Add(ref)
    80  		uniqueFiles = append(uniqueFiles, ref)
    81  	}
    82  
    83  	return uniqueFiles, nil
    84  }
    85  
    86  // FilesByPath returns all file.References that match the given paths from any layer in the image.
    87  func (r *ContainerImageAllLayers) FilesByPath(paths ...string) ([]file.Location, error) {
    88  	uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
    89  	uniqueLocations := make([]file.Location, 0)
    90  
    91  	for _, path := range paths {
    92  		for idx, layerIdx := range r.layers {
    93  			ref, err := r.img.Layers[layerIdx].SearchContext.SearchByPath(path, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks)
    94  			if err != nil {
    95  				return nil, err
    96  			}
    97  			if !ref.HasReference() {
    98  				// no file found, keep looking through layers
    99  				continue
   100  			}
   101  
   102  			// don't consider directories (special case: there is no path information for /)
   103  			if ref.RealPath == "/" {
   104  				continue
   105  			} else if r.img.FileCatalog.Exists(*ref.Reference) {
   106  				metadata, err := r.img.FileCatalog.Get(*ref.Reference)
   107  				if err != nil {
   108  					return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", ref.RealPath, err)
   109  				}
   110  				if metadata.IsDir() {
   111  					continue
   112  				}
   113  			}
   114  
   115  			results, err := r.fileByRef(*ref.Reference, uniqueFileIDs, idx)
   116  			if err != nil {
   117  				return nil, err
   118  			}
   119  			for _, result := range results {
   120  				l := file.NewLocationFromImage(path, result, r.img)
   121  				r.annotateLocation(&l)
   122  				uniqueLocations = append(uniqueLocations, l)
   123  			}
   124  		}
   125  	}
   126  	return uniqueLocations, nil
   127  }
   128  
   129  // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
   130  //
   131  //nolint:gocognit
   132  func (r *ContainerImageAllLayers) FilesByGlob(patterns ...string) ([]file.Location, error) {
   133  	uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
   134  	uniqueLocations := make([]file.Location, 0)
   135  
   136  	for _, pattern := range patterns {
   137  		for idx, layerIdx := range r.layers {
   138  			results, err := r.img.Layers[layerIdx].SquashedSearchContext.SearchByGlob(pattern, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks)
   139  			if err != nil {
   140  				return nil, fmt.Errorf("failed to resolve files by glob (%s): %w", pattern, err)
   141  			}
   142  
   143  			for _, result := range results {
   144  				if !result.HasReference() {
   145  					continue
   146  				}
   147  				// don't consider directories (special case: there is no path information for /)
   148  				if result.RealPath == "/" {
   149  					continue
   150  				} else if r.img.FileCatalog.Exists(*result.Reference) {
   151  					metadata, err := r.img.FileCatalog.Get(*result.Reference)
   152  					if err != nil {
   153  						return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.RequestPath, err)
   154  					}
   155  					// don't consider directories
   156  					if metadata.IsDir() {
   157  						continue
   158  					}
   159  				}
   160  
   161  				refResults, err := r.fileByRef(*result.Reference, uniqueFileIDs, idx)
   162  				if err != nil {
   163  					return nil, err
   164  				}
   165  				for _, refResult := range refResults {
   166  					l := file.NewLocationFromImage(string(result.RequestPath), refResult, r.img)
   167  					r.annotateLocation(&l)
   168  					uniqueLocations = append(uniqueLocations, l)
   169  				}
   170  			}
   171  		}
   172  	}
   173  
   174  	return uniqueLocations, nil
   175  }
   176  
   177  // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
   178  // This is helpful when attempting to find a file that is in the same layer or lower as another file.
   179  func (r *ContainerImageAllLayers) RelativeFileByPath(location file.Location, path string) *file.Location {
   180  	layer := r.img.FileCatalog.Layer(location.Reference())
   181  
   182  	exists, relativeRef, err := layer.SquashedTree.File(stereoscopeFile.Path(path), filetree.FollowBasenameLinks)
   183  	if err != nil {
   184  		log.Errorf("failed to find path=%q in squash: %+v", path, err)
   185  		return nil
   186  	}
   187  	if !exists && !relativeRef.HasReference() {
   188  		return nil
   189  	}
   190  
   191  	relativeLocation := file.NewLocationFromImage(path, *relativeRef.Reference, r.img)
   192  	r.annotateLocation(&relativeLocation)
   193  
   194  	return &relativeLocation
   195  }
   196  
   197  // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer.
   198  // If the path does not exist an error is returned.
   199  func (r *ContainerImageAllLayers) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
   200  	entry, err := r.img.FileCatalog.Get(location.Reference())
   201  	if err != nil {
   202  		return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err)
   203  	}
   204  
   205  	switch entry.Type {
   206  	case stereoscopeFile.TypeSymLink, stereoscopeFile.TypeHardLink:
   207  		// the location we are searching may be a symlink, we should always work with the resolved file
   208  		newLocation := r.RelativeFileByPath(location, location.AccessPath)
   209  		if newLocation == nil {
   210  			// this is a dead link
   211  			return nil, fmt.Errorf("no contents for location=%q", location.AccessPath)
   212  		}
   213  		location = *newLocation
   214  	case stereoscopeFile.TypeDirectory:
   215  		return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
   216  	}
   217  
   218  	return r.img.OpenReference(location.Reference())
   219  }
   220  
   221  func (r *ContainerImageAllLayers) FilesByMIMEType(types ...string) ([]file.Location, error) {
   222  	uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
   223  	uniqueLocations := make([]file.Location, 0)
   224  
   225  	for idx, layerIdx := range r.layers {
   226  		refs, err := r.img.Layers[layerIdx].SearchContext.SearchByMIMEType(types...)
   227  		if err != nil {
   228  			return nil, err
   229  		}
   230  
   231  		for _, ref := range refs {
   232  			if !ref.HasReference() {
   233  				continue
   234  			}
   235  
   236  			refResults, err := r.fileByRef(*ref.Reference, uniqueFileIDs, idx)
   237  			if err != nil {
   238  				return nil, err
   239  			}
   240  			for _, refResult := range refResults {
   241  				l := file.NewLocationFromImage(string(ref.RequestPath), refResult, r.img)
   242  				r.annotateLocation(&l)
   243  				uniqueLocations = append(uniqueLocations, l)
   244  			}
   245  		}
   246  	}
   247  
   248  	return uniqueLocations, nil
   249  }
   250  
   251  func (r *ContainerImageAllLayers) AllLocations(ctx context.Context) <-chan file.Location {
   252  	results := make(chan file.Location)
   253  	go func() {
   254  		defer close(results)
   255  		for _, layerIdx := range r.layers {
   256  			tree := r.img.Layers[layerIdx].Tree
   257  			for _, ref := range tree.AllFiles(stereoscopeFile.AllTypes()...) {
   258  				l := file.NewLocationFromImage(string(ref.RealPath), ref, r.img)
   259  				r.annotateLocation(&l)
   260  				select {
   261  				case <-ctx.Done():
   262  					return
   263  				case results <- l:
   264  					continue
   265  				}
   266  			}
   267  		}
   268  	}()
   269  	return results
   270  }
   271  
   272  func (r *ContainerImageAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
   273  	return fileMetadataByLocation(r.img, location)
   274  }
   275  
   276  func (r *ContainerImageAllLayers) annotateLocation(l *file.Location) {
   277  	if !r.markVisibility || l == nil {
   278  		return
   279  	}
   280  
   281  	givenRef := l.Reference()
   282  	annotation := file.VisibleAnnotation
   283  
   284  	// if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it
   285  	ref, err := r.img.SquashedSearchContext.SearchByPath(l.RealPath, filetree.DoNotFollowDeadBasenameLinks)
   286  	if err != nil || !ref.HasReference() {
   287  		annotation = file.HiddenAnnotation
   288  	} else if ref.ID() != givenRef.ID() {
   289  		// we may have the path in the squashed tree, but this must not be in the same layer
   290  		annotation = file.HiddenAnnotation
   291  	}
   292  
   293  	// not only should the real path to the file exist, but the way we took to get there should also exist
   294  	// (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should
   295  	// make certain that /etc/passwd-1 exists)
   296  	if annotation == file.VisibleAnnotation && l.AccessPath != "" {
   297  		ref, err := r.img.SquashedSearchContext.SearchByPath(l.AccessPath, filetree.DoNotFollowDeadBasenameLinks)
   298  		if err != nil || !ref.HasReference() {
   299  			annotation = file.HiddenAnnotation
   300  		} else if ref.ID() != givenRef.ID() {
   301  			// we may have the path in the squashed tree, but this must not be in the same layer
   302  			annotation = file.HiddenAnnotation
   303  		}
   304  	}
   305  
   306  	l.Annotations[file.VisibleAnnotationKey] = annotation
   307  }