github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/internal/fileresolver/container_image_squash_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"io"
     5  	"sort"
     6  	"testing"
     7  
     8  	"github.com/google/go-cmp/cmp"
     9  	"github.com/google/go-cmp/cmp/cmpopts"
    10  	"github.com/scylladb/go-set/strset"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  
    14  	"github.com/anchore/stereoscope/pkg/imagetest"
    15  	"github.com/anchore/syft/syft/file"
    16  )
    17  
    18  func TestImageSquashResolver_FilesByPath(t *testing.T) {
    19  	cases := []struct {
    20  		name                 string
    21  		linkPath             string
    22  		resolveLayer         uint
    23  		resolvePath          string
    24  		forcePositiveHasPath bool
    25  	}{
    26  		{
    27  			name:         "link with previous data",
    28  			linkPath:     "/link-1",
    29  			resolveLayer: 1,
    30  			resolvePath:  "/file-1.txt",
    31  		},
    32  		{
    33  			name:         "link with in layer data",
    34  			linkPath:     "/link-within",
    35  			resolveLayer: 5,
    36  			resolvePath:  "/file-3.txt",
    37  		},
    38  		{
    39  			name:         "link with overridden data",
    40  			linkPath:     "/link-2",
    41  			resolveLayer: 7,
    42  			resolvePath:  "/file-2.txt",
    43  		},
    44  		{
    45  			name:         "indirect link (with overridden data)",
    46  			linkPath:     "/link-indirect",
    47  			resolveLayer: 7,
    48  			resolvePath:  "/file-2.txt",
    49  		},
    50  		{
    51  			name:         "dead link",
    52  			linkPath:     "/link-dead",
    53  			resolveLayer: 8,
    54  			resolvePath:  "",
    55  			// the path should exist, even if the link is dead
    56  			forcePositiveHasPath: true,
    57  		},
    58  		{
    59  			name:        "ignore directories",
    60  			linkPath:    "/bin",
    61  			resolvePath: "",
    62  			// the path should exist, even if we ignore it
    63  			forcePositiveHasPath: true,
    64  		},
    65  		{
    66  			name:         "parent is a link (with overridden data)",
    67  			linkPath:     "/parent-link/file-4.txt",
    68  			resolveLayer: 11,
    69  			resolvePath:  "/parent/file-4.txt",
    70  		},
    71  	}
    72  	for _, c := range cases {
    73  		t.Run(c.name, func(t *testing.T) {
    74  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
    75  
    76  			resolver, err := NewFromContainerImageSquash(img)
    77  			require.NoError(t, err)
    78  
    79  			hasPath := resolver.HasPath(c.linkPath)
    80  			if !c.forcePositiveHasPath {
    81  				if c.resolvePath != "" && !hasPath {
    82  					t.Errorf("expected HasPath() to indicate existance, but did not")
    83  				} else if c.resolvePath == "" && hasPath {
    84  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
    85  				}
    86  			} else if !hasPath {
    87  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
    88  			}
    89  
    90  			refs, err := resolver.FilesByPath(c.linkPath)
    91  			require.NoError(t, err)
    92  
    93  			expectedRefs := 1
    94  			if c.resolvePath == "" {
    95  				expectedRefs = 0
    96  			}
    97  
    98  			if len(refs) != expectedRefs {
    99  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   100  			}
   101  
   102  			if expectedRefs == 0 {
   103  				// nothing else to assert
   104  				return
   105  			}
   106  
   107  			actual := refs[0]
   108  
   109  			if string(actual.Reference().RealPath) != c.resolvePath {
   110  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   111  			}
   112  
   113  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   114  				t.Errorf("we should always prefer real paths over ones with links")
   115  			}
   116  
   117  			layer := img.FileCatalog.Layer(actual.Reference())
   118  
   119  			if layer.Metadata.Index != c.resolveLayer {
   120  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   121  			}
   122  		})
   123  	}
   124  }
   125  
   126  func TestImageSquashResolver_FilesByGlob(t *testing.T) {
   127  	cases := []struct {
   128  		name         string
   129  		glob         string
   130  		resolveLayer uint
   131  		resolvePath  string
   132  	}{
   133  		{
   134  			name:         "link with previous data",
   135  			glob:         "**/link-1",
   136  			resolveLayer: 1,
   137  			resolvePath:  "/file-1.txt",
   138  		},
   139  		{
   140  			name:         "link with in layer data",
   141  			glob:         "**/link-within",
   142  			resolveLayer: 5,
   143  			resolvePath:  "/file-3.txt",
   144  		},
   145  		{
   146  			name:         "link with overridden data",
   147  			glob:         "**/link-2",
   148  			resolveLayer: 7,
   149  			resolvePath:  "/file-2.txt",
   150  		},
   151  		{
   152  			name:         "indirect link (with overridden data)",
   153  			glob:         "**/link-indirect",
   154  			resolveLayer: 7,
   155  			resolvePath:  "/file-2.txt",
   156  		},
   157  		{
   158  			name: "dead link",
   159  			glob: "**/link-dead",
   160  			// dead links are dead! they shouldn't match on globs
   161  			resolvePath: "",
   162  		},
   163  		{
   164  			name:        "ignore directories",
   165  			glob:        "**/bin",
   166  			resolvePath: "",
   167  		},
   168  		{
   169  			name:         "parent without link",
   170  			glob:         "**/parent/*.txt",
   171  			resolveLayer: 11,
   172  			resolvePath:  "/parent/file-4.txt",
   173  		},
   174  		{
   175  			name:         "parent is a link (override)",
   176  			glob:         "**/parent-link/file-4.txt",
   177  			resolveLayer: 11,
   178  			resolvePath:  "/parent/file-4.txt",
   179  		},
   180  	}
   181  	for _, c := range cases {
   182  		t.Run(c.name, func(t *testing.T) {
   183  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   184  
   185  			resolver, err := NewFromContainerImageSquash(img)
   186  			require.NoError(t, err)
   187  
   188  			refs, err := resolver.FilesByGlob(c.glob)
   189  			require.NoError(t, err)
   190  
   191  			expectedRefs := 1
   192  			if c.resolvePath == "" {
   193  				expectedRefs = 0
   194  			}
   195  
   196  			if len(refs) != expectedRefs {
   197  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   198  			}
   199  
   200  			if expectedRefs == 0 {
   201  				// nothing else to assert
   202  				return
   203  			}
   204  
   205  			actual := refs[0]
   206  
   207  			if string(actual.Reference().RealPath) != c.resolvePath {
   208  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   209  			}
   210  
   211  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   212  				t.Errorf("we should always prefer real paths over ones with links")
   213  			}
   214  
   215  			layer := img.FileCatalog.Layer(actual.Reference())
   216  
   217  			if layer.Metadata.Index != c.resolveLayer {
   218  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   219  			}
   220  		})
   221  	}
   222  }
   223  
   224  func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) {
   225  
   226  	tests := []struct {
   227  		fixtureName   string
   228  		mimeType      string
   229  		expectedPaths *strset.Set
   230  	}{
   231  		{
   232  			fixtureName:   "image-simple",
   233  			mimeType:      "text/plain",
   234  			expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"),
   235  		},
   236  	}
   237  
   238  	for _, test := range tests {
   239  		t.Run(test.fixtureName, func(t *testing.T) {
   240  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   241  
   242  			resolver, err := NewFromContainerImageSquash(img)
   243  			assert.NoError(t, err)
   244  
   245  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   246  			assert.NoError(t, err)
   247  
   248  			assert.Len(t, locations, test.expectedPaths.Size())
   249  			for _, l := range locations {
   250  				assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
   251  			}
   252  		})
   253  	}
   254  }
   255  
   256  func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) {
   257  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   258  
   259  	resolver, err := NewFromContainerImageSquash(img)
   260  	assert.NoError(t, err)
   261  
   262  	locations, err := resolver.FilesByMIMEType("text/plain")
   263  	assert.NoError(t, err)
   264  	assert.NotEmpty(t, locations)
   265  	for _, location := range locations {
   266  		assert.NotEmpty(t, location.FileSystemID)
   267  	}
   268  
   269  	locations, err = resolver.FilesByGlob("*.txt")
   270  	assert.NoError(t, err)
   271  	assert.NotEmpty(t, locations)
   272  	for _, location := range locations {
   273  		assert.NotEmpty(t, location.FileSystemID)
   274  	}
   275  
   276  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   277  	assert.NoError(t, err)
   278  	assert.NotEmpty(t, locations)
   279  	for _, location := range locations {
   280  		assert.NotEmpty(t, location.FileSystemID)
   281  	}
   282  
   283  }
   284  
   285  func TestSquashImageResolver_FilesContents(t *testing.T) {
   286  
   287  	tests := []struct {
   288  		name     string
   289  		path     string
   290  		contents []string
   291  	}{
   292  		{
   293  			name: "one degree",
   294  			path: "link-2",
   295  			contents: []string{
   296  				"NEW file override!", // always from the squashed perspective
   297  			},
   298  		},
   299  		{
   300  			name: "two degrees",
   301  			path: "link-indirect",
   302  			contents: []string{
   303  				"NEW file override!", // always from the squashed perspective
   304  			},
   305  		},
   306  		{
   307  			name:     "dead link",
   308  			path:     "link-dead",
   309  			contents: []string{},
   310  		},
   311  	}
   312  
   313  	for _, test := range tests {
   314  		t.Run(test.name, func(t *testing.T) {
   315  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   316  
   317  			resolver, err := NewFromContainerImageSquash(img)
   318  			assert.NoError(t, err)
   319  
   320  			refs, err := resolver.FilesByPath(test.path)
   321  			require.NoError(t, err)
   322  			assert.Len(t, refs, len(test.contents))
   323  
   324  			for idx, loc := range refs {
   325  
   326  				reader, err := resolver.FileContentsByLocation(loc)
   327  				require.NoError(t, err)
   328  
   329  				actual, err := io.ReadAll(reader)
   330  				require.NoError(t, err)
   331  
   332  				assert.Equal(t, test.contents[idx], string(actual))
   333  			}
   334  		})
   335  	}
   336  }
   337  
   338  func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) {
   339  
   340  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   341  
   342  	resolver, err := NewFromContainerImageSquash(img)
   343  	assert.NoError(t, err)
   344  
   345  	var dirLoc *file.Location
   346  	for loc := range resolver.AllLocations() {
   347  		entry, err := resolver.img.FileCatalog.Get(loc.Reference())
   348  		require.NoError(t, err)
   349  		if entry.Metadata.IsDir() {
   350  			dirLoc = &loc
   351  			break
   352  		}
   353  	}
   354  
   355  	require.NotNil(t, dirLoc)
   356  
   357  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   358  	require.Error(t, err)
   359  	require.Nil(t, reader)
   360  }
   361  
   362  func Test_imageSquashResolver_resolvesLinks(t *testing.T) {
   363  	tests := []struct {
   364  		name     string
   365  		runner   func(file.Resolver) []file.Location
   366  		expected []file.Location
   367  	}{
   368  		{
   369  			name: "by mimetype",
   370  			runner: func(resolver file.Resolver) []file.Location {
   371  				// links should not show up when searching mimetype
   372  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   373  				assert.NoError(t, err)
   374  				return actualLocations
   375  			},
   376  			expected: []file.Location{
   377  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   378  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   379  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   380  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   381  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   382  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   383  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   384  			},
   385  		},
   386  		{
   387  			name: "by glob to links",
   388  			runner: func(resolver file.Resolver) []file.Location {
   389  				// links are searched, but resolve to the real files
   390  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   391  				assert.NoError(t, err)
   392  				return actualLocations
   393  			},
   394  			expected: []file.Location{
   395  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   396  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   397  
   398  				// though this is a link, and it matches to the file, the resolver de-duplicates files
   399  				// by the real path, so it is not included in the results
   400  				//file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   401  
   402  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   403  			},
   404  		},
   405  		{
   406  			name: "by basename",
   407  			runner: func(resolver file.Resolver) []file.Location {
   408  				// links are searched, but resolve to the real files
   409  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   410  				assert.NoError(t, err)
   411  				return actualLocations
   412  			},
   413  			expected: []file.Location{
   414  				// this has two copies in the base image, which overwrites the same location
   415  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   416  			},
   417  		},
   418  		{
   419  			name: "by basename glob",
   420  			runner: func(resolver file.Resolver) []file.Location {
   421  				// links are searched, but resolve to the real files
   422  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   423  				assert.NoError(t, err)
   424  				return actualLocations
   425  			},
   426  			expected: []file.Location{
   427  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   428  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   429  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   430  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   431  			},
   432  		},
   433  		{
   434  			name: "by basename glob to links",
   435  			runner: func(resolver file.Resolver) []file.Location {
   436  				actualLocations, err := resolver.FilesByGlob("**/link-*")
   437  				assert.NoError(t, err)
   438  				return actualLocations
   439  			},
   440  			expected: []file.Location{
   441  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   442  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   443  
   444  				// we already have this real file path via another link, so only one is returned
   445  				// file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   446  
   447  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   448  			},
   449  		},
   450  		{
   451  			name: "by extension",
   452  			runner: func(resolver file.Resolver) []file.Location {
   453  				// links are searched, but resolve to the real files
   454  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   455  				assert.NoError(t, err)
   456  				return actualLocations
   457  			},
   458  			expected: []file.Location{
   459  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   460  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   461  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   462  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   463  			},
   464  		},
   465  		{
   466  			name: "by path to degree 1 link",
   467  			runner: func(resolver file.Resolver) []file.Location {
   468  				// links resolve to the final file
   469  				actualLocations, err := resolver.FilesByPath("/link-2")
   470  				assert.NoError(t, err)
   471  				return actualLocations
   472  			},
   473  			expected: []file.Location{
   474  				// we have multiple copies across layers
   475  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   476  			},
   477  		},
   478  		{
   479  			name: "by path to degree 2 link",
   480  			runner: func(resolver file.Resolver) []file.Location {
   481  				// multiple links resolves to the final file
   482  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   483  				assert.NoError(t, err)
   484  				return actualLocations
   485  			},
   486  			expected: []file.Location{
   487  				// we have multiple copies across layers
   488  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   489  			},
   490  		},
   491  	}
   492  
   493  	for _, test := range tests {
   494  		t.Run(test.name, func(t *testing.T) {
   495  
   496  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   497  
   498  			resolver, err := NewFromContainerImageSquash(img)
   499  			assert.NoError(t, err)
   500  
   501  			actual := test.runner(resolver)
   502  
   503  			compareLocations(t, test.expected, actual)
   504  		})
   505  	}
   506  
   507  }
   508  
   509  func compareLocations(t *testing.T, expected, actual []file.Location) {
   510  	t.Helper()
   511  	ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{})
   512  	ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations")
   513  	ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID")
   514  
   515  	sort.Sort(file.Locations(expected))
   516  	sort.Sort(file.Locations(actual))
   517  
   518  	if d := cmp.Diff(expected, actual,
   519  		ignoreUnexported,
   520  		ignoreFS,
   521  		ignoreMetadata,
   522  	); d != "" {
   523  
   524  		t.Errorf("unexpected locations (-want +got):\n%s", d)
   525  	}
   526  
   527  }
   528  
   529  func TestSquashResolver_AllLocations(t *testing.T) {
   530  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   531  
   532  	resolver, err := NewFromContainerImageSquash(img)
   533  	assert.NoError(t, err)
   534  
   535  	paths := strset.New()
   536  	for loc := range resolver.AllLocations() {
   537  		paths.Add(loc.RealPath)
   538  	}
   539  	expected := []string{
   540  		"/Dockerfile",
   541  		"/file-3.txt",
   542  		"/target",
   543  		"/target/file-2.txt",
   544  	}
   545  
   546  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   547  	// this isn't important for the test, so we remove them.
   548  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   549  
   550  	pathsList := paths.List()
   551  	sort.Strings(pathsList)
   552  
   553  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   554  }