github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_deep_squash.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  
     7  	"github.com/anchore/stereoscope/pkg/image"
     8  	"github.com/anchore/syft/syft/file"
     9  )
    10  
    11  var _ file.Resolver = (*ContainerImageDeepSquash)(nil)
    12  
    13  // ContainerImageDeepSquash implements path and content access for the paths in the squashed tree, but with additional
    14  // depth from all layers. The goal of this is to allow for producing results where the first layer which the material
    15  // was added can be annotated in the SBOM (as opposed to the last [visible] layer for the path like with the squashed
    16  // file resolver).
    17  type ContainerImageDeepSquash struct {
    18  	squashed  file.Resolver
    19  	allLayers file.Resolver
    20  }
    21  
    22  // NewFromContainerImageDeepSquash returns a new resolver from the perspective of all image layers for the given image.
    23  func NewFromContainerImageDeepSquash(img *image.Image) (*ContainerImageDeepSquash, error) {
    24  	squashed, err := NewFromContainerImageSquash(img)
    25  	if err != nil {
    26  		return nil, err
    27  	}
    28  
    29  	allLayers, err := NewFromContainerImageAllLayers(img)
    30  	if err != nil {
    31  		return nil, err
    32  	}
    33  
    34  	// we will do the work here to mark visibility with results from two resolvers (don't do the work twice!)
    35  	allLayers.markVisibility = false
    36  
    37  	return &ContainerImageDeepSquash{
    38  		squashed:  squashed,
    39  		allLayers: allLayers,
    40  	}, nil
    41  }
    42  
    43  // HasPath indicates if the given path exists in the underlying source.
    44  func (i *ContainerImageDeepSquash) HasPath(path string) bool {
    45  	// there is no need to merge results from all layers since path-based results should always be adjusted relative to the squashed tree (which is different when considering layers)
    46  	return i.squashed.HasPath(path)
    47  }
    48  
    49  // FilesByPath returns all file.References that match the given paths from any layer in the image.
    50  func (i *ContainerImageDeepSquash) FilesByPath(paths ...string) ([]file.Location, error) {
    51  	squashedLocations, err := i.squashed.FilesByPath(paths...)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	if len(squashedLocations) == 0 {
    57  		// this is meant to return all files in all layers only for paths that are present in the squashed tree. If
    58  		// there are no results from the squashed tree then there are no paths to raise up.
    59  		return nil, nil
    60  	}
    61  
    62  	allLayersLocations, err := i.allLayers.FilesByPath(paths...)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	return i.mergeLocations(squashedLocations, allLayersLocations), nil
    68  }
    69  
    70  // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
    71  func (i *ContainerImageDeepSquash) FilesByGlob(patterns ...string) ([]file.Location, error) {
    72  	squashedLocations, err := i.squashed.FilesByGlob(patterns...)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	if len(squashedLocations) == 0 {
    78  		// this is meant to return all files in all layers only for paths that are present in the squashed tree. If
    79  		// there are no results from the squashed tree then there are no paths to raise up.
    80  		return nil, nil
    81  	}
    82  
    83  	allLayersLocations, err := i.allLayers.FilesByGlob(patterns...)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  
    88  	return i.mergeLocations(squashedLocations, allLayersLocations), nil
    89  }
    90  
    91  // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
    92  // This is helpful when attempting to find a file that is in the same layer or lower as another file.
    93  func (i *ContainerImageDeepSquash) RelativeFileByPath(location file.Location, path string) *file.Location {
    94  	if !i.squashed.HasPath(path) {
    95  		return nil
    96  	}
    97  
    98  	l := i.squashed.RelativeFileByPath(location, path)
    99  	if l != nil {
   100  		loc := l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation)
   101  		return &loc
   102  	}
   103  
   104  	l = i.allLayers.RelativeFileByPath(location, path)
   105  	if l != nil {
   106  		loc := l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation)
   107  		return &loc
   108  	}
   109  	return nil
   110  }
   111  
   112  // FileContentsByLocation fetches file contents for a single file reference.
   113  // If the path does not exist an error is returned.
   114  func (i *ContainerImageDeepSquash) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
   115  	// regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always
   116  	// return the contents for that specific location (thus all-layers scope must always be used)
   117  	return i.allLayers.FileContentsByLocation(location)
   118  }
   119  
   120  func (i *ContainerImageDeepSquash) FilesByMIMEType(types ...string) ([]file.Location, error) {
   121  	squashedLocations, err := i.squashed.FilesByMIMEType(types...)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	if len(squashedLocations) == 0 {
   127  		// this is meant to return all files in all layers only for paths that are present in the squashed tree. If
   128  		// there are no results from the squashed tree then there are no paths to raise up.
   129  		return nil, nil
   130  	}
   131  
   132  	allLayersLocations, err := i.allLayers.FilesByMIMEType(types...)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	return i.mergeLocations(squashedLocations, allLayersLocations), nil
   138  }
   139  
   140  func (i *ContainerImageDeepSquash) AllLocations(ctx context.Context) <-chan file.Location {
   141  	return i.mergeLocationStreams(ctx, i.squashed.AllLocations(ctx), i.allLayers.AllLocations(ctx))
   142  }
   143  
   144  func (i *ContainerImageDeepSquash) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
   145  	// regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always
   146  	// return the metadata for that specific location (thus all-layers scope must always be used)
   147  	return i.allLayers.FileMetadataByLocation(location)
   148  }
   149  
   150  func (i *ContainerImageDeepSquash) mergeLocations(squashedLocations, allLayersLocations []file.Location) []file.Location {
   151  	var result []file.Location
   152  
   153  	if len(squashedLocations) == 0 {
   154  		// this is meant to return all files in all layers only for paths that are present in the squashed tree. If
   155  		// there are no results from the squashed tree then there are no paths to raise up.
   156  		return nil
   157  	}
   158  
   159  	// we are using a location set to deduplicate locations, but we don't use it for the returned
   160  	// results in order to preserve the order of the locations from the underlying filetree query
   161  	squashedCoords := file.NewLocationSet()
   162  	for _, l := range squashedLocations {
   163  		result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation))
   164  		squashedCoords.Add(l)
   165  	}
   166  
   167  	for _, l := range allLayersLocations {
   168  		if squashedCoords.Contains(l) {
   169  			// this path + layer is already in the squashed tree results, skip it (deduplicate location results)
   170  			continue
   171  		}
   172  
   173  		if !i.squashed.HasPath(l.RealPath) {
   174  			// if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it
   175  			continue
   176  		}
   177  
   178  		// not only should the real path to the file exist, but the way we took to get there should also exist
   179  		// (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should
   180  		// make certain that /etc/passwd-1 exists)
   181  		if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) {
   182  			continue
   183  		}
   184  
   185  		result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation))
   186  	}
   187  
   188  	return result
   189  }
   190  
   191  func (i *ContainerImageDeepSquash) mergeLocationStreams(ctx context.Context, squashedLocations, allLayersLocations <-chan file.Location) <-chan file.Location {
   192  	result := make(chan file.Location)
   193  	go func() {
   194  		defer close(result)
   195  
   196  		// we are using a location set to deduplicate locations, but we don't use it for the returned
   197  		// results in order to preserve the order of the locations from the underlying filetree query
   198  		squashedCoords := file.NewLocationSet()
   199  		var isDone bool
   200  		for l := range squashedLocations {
   201  			if isDone {
   202  				// bleed off the rest of the results from the squashed stream and not leak a goroutine
   203  				continue
   204  			}
   205  			select {
   206  			case <-ctx.Done():
   207  				isDone = true
   208  			default:
   209  				result <- l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation)
   210  				squashedCoords.Add(l)
   211  			}
   212  		}
   213  
   214  		for l := range allLayersLocations {
   215  			if isDone {
   216  				// bleed off the rest of the results from the squashed stream and not leak a goroutine
   217  				continue
   218  			}
   219  
   220  			if squashedCoords.Empty() {
   221  				// this is meant to return all files in all layers only for paths that are present in the squashed tree.
   222  				// If there are no results from the squashed tree, then there are no paths to raise up.
   223  				// That being said, we need to bleed off the rest of the results from the allLayersLocations stream
   224  				// and not leak a goroutine.
   225  				continue
   226  			}
   227  
   228  			if squashedCoords.Contains(l) {
   229  				// we've already seen this location from the squashed stream, skip it
   230  				continue
   231  			}
   232  
   233  			if !i.squashed.HasPath(l.RealPath) {
   234  				// if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it
   235  				continue
   236  			}
   237  
   238  			// not only should the real path to the file exist, but the way we took to get there should also exist
   239  			// (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should
   240  			// make certain that /etc/passwd-1 exists)
   241  			if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) {
   242  				continue
   243  			}
   244  
   245  			select {
   246  			case <-ctx.Done():
   247  				isDone = true
   248  			default:
   249  				result <- l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation)
   250  			}
   251  		}
   252  	}()
   253  
   254  	return result
   255  }