github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_squash_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"slices"
     7  	"sort"
     8  	"strings"
     9  	"testing"
    10  
    11  	"github.com/google/go-cmp/cmp"
    12  	"github.com/google/go-cmp/cmp/cmpopts"
    13  	"github.com/scylladb/go-set/strset"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  
    17  	"github.com/anchore/stereoscope/pkg/imagetest"
    18  	"github.com/anchore/syft/syft/file"
    19  )
    20  
    21  func TestImageSquashResolver_FilesByPath(t *testing.T) {
    22  	cases := []struct {
    23  		name                 string
    24  		linkPath             string
    25  		resolveLayer         uint
    26  		resolvePath          string
    27  		forcePositiveHasPath bool
    28  	}{
    29  		{
    30  			name:         "link with previous data",
    31  			linkPath:     "/link-1",
    32  			resolveLayer: 1,
    33  			resolvePath:  "/file-1.txt",
    34  		},
    35  		{
    36  			name:         "link with in layer data",
    37  			linkPath:     "/link-within",
    38  			resolveLayer: 5,
    39  			resolvePath:  "/file-3.txt",
    40  		},
    41  		{
    42  			name:         "link with overridden data",
    43  			linkPath:     "/link-2",
    44  			resolveLayer: 7,
    45  			resolvePath:  "/file-2.txt",
    46  		},
    47  		{
    48  			name:         "indirect link (with overridden data)",
    49  			linkPath:     "/link-indirect",
    50  			resolveLayer: 7,
    51  			resolvePath:  "/file-2.txt",
    52  		},
    53  		{
    54  			name:         "dead link",
    55  			linkPath:     "/link-dead",
    56  			resolveLayer: 8,
    57  			resolvePath:  "",
    58  			// the path should exist, even if the link is dead
    59  			forcePositiveHasPath: true,
    60  		},
    61  		{
    62  			name:        "ignore directories",
    63  			linkPath:    "/bin",
    64  			resolvePath: "",
    65  			// the path should exist, even if we ignore it
    66  			forcePositiveHasPath: true,
    67  		},
    68  		{
    69  			name:         "parent is a link (with overridden data)",
    70  			linkPath:     "/parent-link/file-4.txt",
    71  			resolveLayer: 11,
    72  			resolvePath:  "/parent/file-4.txt",
    73  		},
    74  	}
    75  	for _, c := range cases {
    76  		t.Run(c.name, func(t *testing.T) {
    77  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
    78  
    79  			resolver, err := NewFromContainerImageSquash(img)
    80  			require.NoError(t, err)
    81  
    82  			hasPath := resolver.HasPath(c.linkPath)
    83  			if !c.forcePositiveHasPath {
    84  				if c.resolvePath != "" && !hasPath {
    85  					t.Errorf("expected HasPath() to indicate existance, but did not")
    86  				} else if c.resolvePath == "" && hasPath {
    87  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
    88  				}
    89  			} else if !hasPath {
    90  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
    91  			}
    92  
    93  			refs, err := resolver.FilesByPath(c.linkPath)
    94  			require.NoError(t, err)
    95  
    96  			expectedRefs := 1
    97  			if c.resolvePath == "" {
    98  				expectedRefs = 0
    99  			}
   100  
   101  			if len(refs) != expectedRefs {
   102  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   103  			}
   104  
   105  			if expectedRefs == 0 {
   106  				// nothing else to assert
   107  				return
   108  			}
   109  
   110  			actual := refs[0]
   111  
   112  			if string(actual.Reference().RealPath) != c.resolvePath {
   113  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   114  			}
   115  
   116  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   117  				t.Errorf("we should always prefer real paths over ones with links")
   118  			}
   119  
   120  			layer := img.FileCatalog.Layer(actual.Reference())
   121  
   122  			if layer.Metadata.Index != c.resolveLayer {
   123  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   124  			}
   125  		})
   126  	}
   127  }
   128  
   129  func TestImageSquashResolver_FilesByGlob(t *testing.T) {
   130  	cases := []struct {
   131  		name         string
   132  		glob         string
   133  		resolveLayer uint
   134  		resolvePath  string
   135  	}{
   136  		{
   137  			name:         "link with previous data",
   138  			glob:         "**/link-1",
   139  			resolveLayer: 1,
   140  			resolvePath:  "/file-1.txt",
   141  		},
   142  		{
   143  			name:         "link with in layer data",
   144  			glob:         "**/link-within",
   145  			resolveLayer: 5,
   146  			resolvePath:  "/file-3.txt",
   147  		},
   148  		{
   149  			name:         "link with overridden data",
   150  			glob:         "**/link-2",
   151  			resolveLayer: 7,
   152  			resolvePath:  "/file-2.txt",
   153  		},
   154  		{
   155  			name:         "indirect link (with overridden data)",
   156  			glob:         "**/link-indirect",
   157  			resolveLayer: 7,
   158  			resolvePath:  "/file-2.txt",
   159  		},
   160  		{
   161  			name: "dead link",
   162  			glob: "**/link-dead",
   163  			// dead links are dead! they shouldn't match on globs
   164  			resolvePath: "",
   165  		},
   166  		{
   167  			name:        "ignore directories",
   168  			glob:        "**/bin",
   169  			resolvePath: "",
   170  		},
   171  		{
   172  			name:         "parent without link",
   173  			glob:         "**/parent/*.txt",
   174  			resolveLayer: 11,
   175  			resolvePath:  "/parent/file-4.txt",
   176  		},
   177  		{
   178  			name:         "parent is a link (override)",
   179  			glob:         "**/parent-link/file-4.txt",
   180  			resolveLayer: 11,
   181  			resolvePath:  "/parent/file-4.txt",
   182  		},
   183  	}
   184  	for _, c := range cases {
   185  		t.Run(c.name, func(t *testing.T) {
   186  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   187  
   188  			resolver, err := NewFromContainerImageSquash(img)
   189  			require.NoError(t, err)
   190  
   191  			refs, err := resolver.FilesByGlob(c.glob)
   192  			require.NoError(t, err)
   193  
   194  			expectedRefs := 1
   195  			if c.resolvePath == "" {
   196  				expectedRefs = 0
   197  			}
   198  
   199  			if len(refs) != expectedRefs {
   200  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   201  			}
   202  
   203  			if expectedRefs == 0 {
   204  				// nothing else to assert
   205  				return
   206  			}
   207  
   208  			actual := refs[0]
   209  
   210  			if string(actual.Reference().RealPath) != c.resolvePath {
   211  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   212  			}
   213  
   214  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   215  				t.Errorf("we should always prefer real paths over ones with links")
   216  			}
   217  
   218  			layer := img.FileCatalog.Layer(actual.Reference())
   219  
   220  			if layer.Metadata.Index != c.resolveLayer {
   221  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   222  			}
   223  		})
   224  	}
   225  }
   226  
   227  func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) {
   228  
   229  	tests := []struct {
   230  		fixtureName   string
   231  		mimeType      string
   232  		expectedPaths *strset.Set
   233  	}{
   234  		{
   235  			fixtureName:   "image-simple",
   236  			mimeType:      "text/plain",
   237  			expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"),
   238  		},
   239  	}
   240  
   241  	for _, test := range tests {
   242  		t.Run(test.fixtureName, func(t *testing.T) {
   243  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   244  
   245  			resolver, err := NewFromContainerImageSquash(img)
   246  			assert.NoError(t, err)
   247  
   248  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   249  			assert.NoError(t, err)
   250  
   251  			assert.Len(t, locations, test.expectedPaths.Size())
   252  			for _, l := range locations {
   253  				assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
   254  			}
   255  		})
   256  	}
   257  }
   258  
   259  func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) {
   260  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   261  
   262  	resolver, err := NewFromContainerImageSquash(img)
   263  	assert.NoError(t, err)
   264  
   265  	locations, err := resolver.FilesByMIMEType("text/plain")
   266  	assert.NoError(t, err)
   267  	assert.NotEmpty(t, locations)
   268  	for _, location := range locations {
   269  		assert.NotEmpty(t, location.FileSystemID)
   270  	}
   271  
   272  	locations, err = resolver.FilesByGlob("*.txt")
   273  	assert.NoError(t, err)
   274  	assert.NotEmpty(t, locations)
   275  	for _, location := range locations {
   276  		assert.NotEmpty(t, location.FileSystemID)
   277  	}
   278  
   279  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   280  	assert.NoError(t, err)
   281  	assert.NotEmpty(t, locations)
   282  	for _, location := range locations {
   283  		assert.NotEmpty(t, location.FileSystemID)
   284  	}
   285  
   286  }
   287  
   288  func TestSquashImageResolver_FilesContents(t *testing.T) {
   289  
   290  	tests := []struct {
   291  		name     string
   292  		path     string
   293  		contents []string
   294  	}{
   295  		{
   296  			name: "one degree",
   297  			path: "link-2",
   298  			contents: []string{
   299  				"NEW file override!", // always from the squashed perspective
   300  			},
   301  		},
   302  		{
   303  			name: "two degrees",
   304  			path: "link-indirect",
   305  			contents: []string{
   306  				"NEW file override!", // always from the squashed perspective
   307  			},
   308  		},
   309  		{
   310  			name:     "dead link",
   311  			path:     "link-dead",
   312  			contents: []string{},
   313  		},
   314  	}
   315  
   316  	for _, test := range tests {
   317  		t.Run(test.name, func(t *testing.T) {
   318  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   319  
   320  			resolver, err := NewFromContainerImageSquash(img)
   321  			assert.NoError(t, err)
   322  
   323  			refs, err := resolver.FilesByPath(test.path)
   324  			require.NoError(t, err)
   325  			assert.Len(t, refs, len(test.contents))
   326  
   327  			for idx, loc := range refs {
   328  
   329  				reader, err := resolver.FileContentsByLocation(loc)
   330  				require.NoError(t, err)
   331  
   332  				actual, err := io.ReadAll(reader)
   333  				require.NoError(t, err)
   334  
   335  				assert.Equal(t, test.contents[idx], string(actual))
   336  			}
   337  		})
   338  	}
   339  }
   340  
   341  func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) {
   342  
   343  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   344  
   345  	resolver, err := NewFromContainerImageSquash(img)
   346  	assert.NoError(t, err)
   347  
   348  	var dirLoc *file.Location
   349  	ctx, cancel := context.WithCancel(context.Background())
   350  	defer cancel()
   351  	for loc := range resolver.AllLocations(ctx) {
   352  		entry, err := resolver.img.FileCatalog.Get(loc.Reference())
   353  		require.NoError(t, err)
   354  		if entry.Metadata.IsDir() {
   355  			dirLoc = &loc
   356  			break
   357  		}
   358  	}
   359  
   360  	require.NotNil(t, dirLoc)
   361  
   362  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   363  	require.Error(t, err)
   364  	require.Nil(t, reader)
   365  }
   366  
   367  func Test_imageSquashResolver_resolvesLinks(t *testing.T) {
   368  	tests := []struct {
   369  		name     string
   370  		runner   func(file.Resolver) []file.Location
   371  		expected []file.Location
   372  	}{
   373  		{
   374  			name: "by mimetype",
   375  			runner: func(resolver file.Resolver) []file.Location {
   376  				// links should not show up when searching mimetype
   377  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   378  				assert.NoError(t, err)
   379  				return actualLocations
   380  			},
   381  			expected: []file.Location{
   382  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   383  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   384  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   385  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   386  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   387  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   388  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   389  			},
   390  		},
   391  		{
   392  			name: "by glob to links",
   393  			runner: func(resolver file.Resolver) []file.Location {
   394  				// links are searched, but resolve to the real files
   395  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   396  				assert.NoError(t, err)
   397  				return actualLocations
   398  			},
   399  			expected: []file.Location{
   400  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   401  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   402  
   403  				// though this is a link, and it matches to the file, the resolver de-duplicates files
   404  				// by the real path, so it is not included in the results
   405  				//file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   406  
   407  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   408  			},
   409  		},
   410  		{
   411  			name: "by basename",
   412  			runner: func(resolver file.Resolver) []file.Location {
   413  				// links are searched, but resolve to the real files
   414  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   415  				assert.NoError(t, err)
   416  				return actualLocations
   417  			},
   418  			expected: []file.Location{
   419  				// this has two copies in the base image, which overwrites the same location
   420  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   421  			},
   422  		},
   423  		{
   424  			name: "by basename glob",
   425  			runner: func(resolver file.Resolver) []file.Location {
   426  				// links are searched, but resolve to the real files
   427  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   428  				assert.NoError(t, err)
   429  				return actualLocations
   430  			},
   431  			expected: []file.Location{
   432  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   433  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   434  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   435  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   436  			},
   437  		},
   438  		{
   439  			name: "by basename glob to links",
   440  			runner: func(resolver file.Resolver) []file.Location {
   441  				actualLocations, err := resolver.FilesByGlob("**/link-*")
   442  				assert.NoError(t, err)
   443  				return actualLocations
   444  			},
   445  			expected: []file.Location{
   446  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   447  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   448  
   449  				// we already have this real file path via another link, so only one is returned
   450  				// file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   451  
   452  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   453  			},
   454  		},
   455  		{
   456  			name: "by extension",
   457  			runner: func(resolver file.Resolver) []file.Location {
   458  				// links are searched, but resolve to the real files
   459  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   460  				assert.NoError(t, err)
   461  				return actualLocations
   462  			},
   463  			expected: []file.Location{
   464  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   465  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   466  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   467  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   468  			},
   469  		},
   470  		{
   471  			name: "by path to degree 1 link",
   472  			runner: func(resolver file.Resolver) []file.Location {
   473  				// links resolve to the final file
   474  				actualLocations, err := resolver.FilesByPath("/link-2")
   475  				assert.NoError(t, err)
   476  				return actualLocations
   477  			},
   478  			expected: []file.Location{
   479  				// we have multiple copies across layers
   480  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   481  			},
   482  		},
   483  		{
   484  			name: "by path to degree 2 link",
   485  			runner: func(resolver file.Resolver) []file.Location {
   486  				// multiple links resolves to the final file
   487  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   488  				assert.NoError(t, err)
   489  				return actualLocations
   490  			},
   491  			expected: []file.Location{
   492  				// we have multiple copies across layers
   493  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   494  			},
   495  		},
   496  	}
   497  
   498  	for _, test := range tests {
   499  		t.Run(test.name, func(t *testing.T) {
   500  
   501  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   502  
   503  			resolver, err := NewFromContainerImageSquash(img)
   504  			assert.NoError(t, err)
   505  
   506  			actual := test.runner(resolver)
   507  
   508  			compareLocations(t, test.expected, actual)
   509  		})
   510  	}
   511  
   512  }
   513  
   514  func compareLocations(t *testing.T, expected, actual []file.Location) {
   515  	t.Helper()
   516  	ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{})
   517  	ignoreUnexportedCoord := cmpopts.IgnoreUnexported(file.Coordinates{})
   518  	ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations")
   519  	ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID")
   520  
   521  	slices.SortFunc(expected, locationSorter)
   522  	slices.SortFunc(actual, locationSorter)
   523  
   524  	if d := cmp.Diff(expected, actual,
   525  		ignoreUnexported,
   526  		ignoreUnexportedCoord,
   527  		ignoreFS,
   528  		ignoreMetadata,
   529  	); d != "" {
   530  
   531  		t.Errorf("unexpected locations (-want +got):\n%s", d)
   532  	}
   533  
   534  }
   535  
   536  // locationSorter always sorts only by path information since test fixtures here only have filesystem IDs
   537  // for one side of the comparison (expected) and not the other (actual).
   538  func locationSorter(a, b file.Location) int {
   539  	if a.AccessPath != b.AccessPath {
   540  		return strings.Compare(a.AccessPath, b.AccessPath)
   541  	}
   542  
   543  	return strings.Compare(a.RealPath, b.RealPath)
   544  }
   545  
   546  func TestSquashResolver_AllLocations(t *testing.T) {
   547  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   548  
   549  	resolver, err := NewFromContainerImageSquash(img)
   550  	assert.NoError(t, err)
   551  
   552  	paths := strset.New()
   553  	ctx, cancel := context.WithCancel(context.Background())
   554  	defer cancel()
   555  	for loc := range resolver.AllLocations(ctx) {
   556  		paths.Add(loc.RealPath)
   557  	}
   558  	expected := []string{
   559  		"/Dockerfile",
   560  		"/file-3.txt",
   561  		"/target",
   562  		"/target/file-2.txt",
   563  	}
   564  
   565  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   566  	// this isn't important for the test, so we remove them.
   567  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   568  
   569  	// Remove cache created by Mac Rosetta when emulating different arches
   570  	paths.Remove("/.cache/rosetta", "/.cache")
   571  
   572  	pathsList := paths.List()
   573  	sort.Strings(pathsList)
   574  
   575  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   576  }