github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_deep_squash.go (about) 1 package fileresolver 2 3 import ( 4 "context" 5 "io" 6 7 "github.com/anchore/stereoscope/pkg/image" 8 "github.com/anchore/syft/syft/file" 9 ) 10 11 var _ file.Resolver = (*ContainerImageDeepSquash)(nil) 12 13 // ContainerImageDeepSquash implements path and content access for the paths in the squashed tree, but with additional 14 // depth from all layers. The goal of this is to allow for producing results where the first layer which the material 15 // was added can be annotated in the SBOM (as opposed to the last [visible] layer for the path like with the squashed 16 // file resolver). 17 type ContainerImageDeepSquash struct { 18 squashed file.Resolver 19 allLayers file.Resolver 20 } 21 22 // NewFromContainerImageDeepSquash returns a new resolver from the perspective of all image layers for the given image. 23 func NewFromContainerImageDeepSquash(img *image.Image) (*ContainerImageDeepSquash, error) { 24 squashed, err := NewFromContainerImageSquash(img) 25 if err != nil { 26 return nil, err 27 } 28 29 allLayers, err := NewFromContainerImageAllLayers(img) 30 if err != nil { 31 return nil, err 32 } 33 34 // we will do the work here to mark visibility with results from two resolvers (don't do the work twice!) 35 allLayers.markVisibility = false 36 37 return &ContainerImageDeepSquash{ 38 squashed: squashed, 39 allLayers: allLayers, 40 }, nil 41 } 42 43 // HasPath indicates if the given path exists in the underlying source. 44 func (i *ContainerImageDeepSquash) HasPath(path string) bool { 45 // there is no need to merge results from all layers since path-based results should always be adjusted relative to the squashed tree (which is different when considering layers) 46 return i.squashed.HasPath(path) 47 } 48 49 // FilesByPath returns all file.References that match the given paths from any layer in the image. 50 func (i *ContainerImageDeepSquash) FilesByPath(paths ...string) ([]file.Location, error) { 51 squashedLocations, err := i.squashed.FilesByPath(paths...) 52 if err != nil { 53 return nil, err 54 } 55 56 if len(squashedLocations) == 0 { 57 // this is meant to return all files in all layers only for paths that are present in the squashed tree. If 58 // there are no results from the squashed tree then there are no paths to raise up. 59 return nil, nil 60 } 61 62 allLayersLocations, err := i.allLayers.FilesByPath(paths...) 63 if err != nil { 64 return nil, err 65 } 66 67 return i.mergeLocations(squashedLocations, allLayersLocations), nil 68 } 69 70 // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. 71 func (i *ContainerImageDeepSquash) FilesByGlob(patterns ...string) ([]file.Location, error) { 72 squashedLocations, err := i.squashed.FilesByGlob(patterns...) 73 if err != nil { 74 return nil, err 75 } 76 77 if len(squashedLocations) == 0 { 78 // this is meant to return all files in all layers only for paths that are present in the squashed tree. If 79 // there are no results from the squashed tree then there are no paths to raise up. 80 return nil, nil 81 } 82 83 allLayersLocations, err := i.allLayers.FilesByGlob(patterns...) 84 if err != nil { 85 return nil, err 86 } 87 88 return i.mergeLocations(squashedLocations, allLayersLocations), nil 89 } 90 91 // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. 92 // This is helpful when attempting to find a file that is in the same layer or lower as another file. 93 func (i *ContainerImageDeepSquash) RelativeFileByPath(location file.Location, path string) *file.Location { 94 if !i.squashed.HasPath(path) { 95 return nil 96 } 97 98 l := i.squashed.RelativeFileByPath(location, path) 99 if l != nil { 100 loc := l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation) 101 return &loc 102 } 103 104 l = i.allLayers.RelativeFileByPath(location, path) 105 if l != nil { 106 loc := l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation) 107 return &loc 108 } 109 return nil 110 } 111 112 // FileContentsByLocation fetches file contents for a single file reference. 113 // If the path does not exist an error is returned. 114 func (i *ContainerImageDeepSquash) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { 115 // regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always 116 // return the contents for that specific location (thus all-layers scope must always be used) 117 return i.allLayers.FileContentsByLocation(location) 118 } 119 120 func (i *ContainerImageDeepSquash) FilesByMIMEType(types ...string) ([]file.Location, error) { 121 squashedLocations, err := i.squashed.FilesByMIMEType(types...) 122 if err != nil { 123 return nil, err 124 } 125 126 if len(squashedLocations) == 0 { 127 // this is meant to return all files in all layers only for paths that are present in the squashed tree. If 128 // there are no results from the squashed tree then there are no paths to raise up. 129 return nil, nil 130 } 131 132 allLayersLocations, err := i.allLayers.FilesByMIMEType(types...) 133 if err != nil { 134 return nil, err 135 } 136 137 return i.mergeLocations(squashedLocations, allLayersLocations), nil 138 } 139 140 func (i *ContainerImageDeepSquash) AllLocations(ctx context.Context) <-chan file.Location { 141 return i.mergeLocationStreams(ctx, i.squashed.AllLocations(ctx), i.allLayers.AllLocations(ctx)) 142 } 143 144 func (i *ContainerImageDeepSquash) FileMetadataByLocation(location file.Location) (file.Metadata, error) { 145 // regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always 146 // return the metadata for that specific location (thus all-layers scope must always be used) 147 return i.allLayers.FileMetadataByLocation(location) 148 } 149 150 func (i *ContainerImageDeepSquash) mergeLocations(squashedLocations, allLayersLocations []file.Location) []file.Location { 151 var result []file.Location 152 153 if len(squashedLocations) == 0 { 154 // this is meant to return all files in all layers only for paths that are present in the squashed tree. If 155 // there are no results from the squashed tree then there are no paths to raise up. 156 return nil 157 } 158 159 // we are using a location set to deduplicate locations, but we don't use it for the returned 160 // results in order to preserve the order of the locations from the underlying filetree query 161 squashedCoords := file.NewLocationSet() 162 for _, l := range squashedLocations { 163 result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation)) 164 squashedCoords.Add(l) 165 } 166 167 for _, l := range allLayersLocations { 168 if squashedCoords.Contains(l) { 169 // this path + layer is already in the squashed tree results, skip it (deduplicate location results) 170 continue 171 } 172 173 if !i.squashed.HasPath(l.RealPath) { 174 // if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it 175 continue 176 } 177 178 // not only should the real path to the file exist, but the way we took to get there should also exist 179 // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should 180 // make certain that /etc/passwd-1 exists) 181 if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) { 182 continue 183 } 184 185 result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation)) 186 } 187 188 return result 189 } 190 191 func (i *ContainerImageDeepSquash) mergeLocationStreams(ctx context.Context, squashedLocations, allLayersLocations <-chan file.Location) <-chan file.Location { 192 result := make(chan file.Location) 193 go func() { 194 defer close(result) 195 196 // we are using a location set to deduplicate locations, but we don't use it for the returned 197 // results in order to preserve the order of the locations from the underlying filetree query 198 squashedCoords := file.NewLocationSet() 199 var isDone bool 200 for l := range squashedLocations { 201 if isDone { 202 // bleed off the rest of the results from the squashed stream and not leak a goroutine 203 continue 204 } 205 select { 206 case <-ctx.Done(): 207 isDone = true 208 default: 209 result <- l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation) 210 squashedCoords.Add(l) 211 } 212 } 213 214 for l := range allLayersLocations { 215 if isDone { 216 // bleed off the rest of the results from the squashed stream and not leak a goroutine 217 continue 218 } 219 220 if squashedCoords.Empty() { 221 // this is meant to return all files in all layers only for paths that are present in the squashed tree. 222 // If there are no results from the squashed tree, then there are no paths to raise up. 223 // That being said, we need to bleed off the rest of the results from the allLayersLocations stream 224 // and not leak a goroutine. 225 continue 226 } 227 228 if squashedCoords.Contains(l) { 229 // we've already seen this location from the squashed stream, skip it 230 continue 231 } 232 233 if !i.squashed.HasPath(l.RealPath) { 234 // if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it 235 continue 236 } 237 238 // not only should the real path to the file exist, but the way we took to get there should also exist 239 // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should 240 // make certain that /etc/passwd-1 exists) 241 if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) { 242 continue 243 } 244 245 select { 246 case <-ctx.Done(): 247 isDone = true 248 default: 249 result <- l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation) 250 } 251 } 252 }() 253 254 return result 255 }