github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/internal/fileresolver/container_image_squash_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"io"
     5  	"sort"
     6  	"testing"
     7  
     8  	"github.com/google/go-cmp/cmp"
     9  	"github.com/google/go-cmp/cmp/cmpopts"
    10  	"github.com/scylladb/go-set/strset"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  
    14  	"github.com/anchore/stereoscope/pkg/imagetest"
    15  	"github.com/anchore/syft/syft/file"
    16  )
    17  
    18  func TestImageSquashResolver_FilesByPath(t *testing.T) {
    19  	cases := []struct {
    20  		name                 string
    21  		linkPath             string
    22  		resolveLayer         uint
    23  		resolvePath          string
    24  		forcePositiveHasPath bool
    25  	}{
    26  		{
    27  			name:         "link with previous data",
    28  			linkPath:     "/link-1",
    29  			resolveLayer: 1,
    30  			resolvePath:  "/file-1.txt",
    31  		},
    32  		{
    33  			name:         "link with in layer data",
    34  			linkPath:     "/link-within",
    35  			resolveLayer: 5,
    36  			resolvePath:  "/file-3.txt",
    37  		},
    38  		{
    39  			name:         "link with overridden data",
    40  			linkPath:     "/link-2",
    41  			resolveLayer: 7,
    42  			resolvePath:  "/file-2.txt",
    43  		},
    44  		{
    45  			name:         "indirect link (with overridden data)",
    46  			linkPath:     "/link-indirect",
    47  			resolveLayer: 7,
    48  			resolvePath:  "/file-2.txt",
    49  		},
    50  		{
    51  			name:         "dead link",
    52  			linkPath:     "/link-dead",
    53  			resolveLayer: 8,
    54  			resolvePath:  "",
    55  			// the path should exist, even if the link is dead
    56  			forcePositiveHasPath: true,
    57  		},
    58  		{
    59  			name:        "ignore directories",
    60  			linkPath:    "/bin",
    61  			resolvePath: "",
    62  			// the path should exist, even if we ignore it
    63  			forcePositiveHasPath: true,
    64  		},
    65  		{
    66  			name:         "parent is a link (with overridden data)",
    67  			linkPath:     "/parent-link/file-4.txt",
    68  			resolveLayer: 11,
    69  			resolvePath:  "/parent/file-4.txt",
    70  		},
    71  	}
    72  	for _, c := range cases {
    73  		t.Run(c.name, func(t *testing.T) {
    74  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
    75  
    76  			resolver, err := NewFromContainerImageSquash(img)
    77  			if err != nil {
    78  				t.Fatalf("could not create resolver: %+v", err)
    79  			}
    80  
    81  			hasPath := resolver.HasPath(c.linkPath)
    82  			if !c.forcePositiveHasPath {
    83  				if c.resolvePath != "" && !hasPath {
    84  					t.Errorf("expected HasPath() to indicate existance, but did not")
    85  				} else if c.resolvePath == "" && hasPath {
    86  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
    87  				}
    88  			} else if !hasPath {
    89  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
    90  			}
    91  
    92  			refs, err := resolver.FilesByPath(c.linkPath)
    93  			if err != nil {
    94  				t.Fatalf("could not use resolver: %+v", err)
    95  			}
    96  
    97  			expectedRefs := 1
    98  			if c.resolvePath == "" {
    99  				expectedRefs = 0
   100  			}
   101  
   102  			if len(refs) != expectedRefs {
   103  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   104  			}
   105  
   106  			if expectedRefs == 0 {
   107  				// nothing else to assert
   108  				return
   109  			}
   110  
   111  			actual := refs[0]
   112  
   113  			if string(actual.Reference().RealPath) != c.resolvePath {
   114  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   115  			}
   116  
   117  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   118  				t.Errorf("we should always prefer real paths over ones with links")
   119  			}
   120  
   121  			layer := img.FileCatalog.Layer(actual.Reference())
   122  
   123  			if layer.Metadata.Index != c.resolveLayer {
   124  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   125  			}
   126  		})
   127  	}
   128  }
   129  
   130  func TestImageSquashResolver_FilesByGlob(t *testing.T) {
   131  	cases := []struct {
   132  		name         string
   133  		glob         string
   134  		resolveLayer uint
   135  		resolvePath  string
   136  	}{
   137  		{
   138  			name:         "link with previous data",
   139  			glob:         "**/link-1",
   140  			resolveLayer: 1,
   141  			resolvePath:  "/file-1.txt",
   142  		},
   143  		{
   144  			name:         "link with in layer data",
   145  			glob:         "**/link-within",
   146  			resolveLayer: 5,
   147  			resolvePath:  "/file-3.txt",
   148  		},
   149  		{
   150  			name:         "link with overridden data",
   151  			glob:         "**/link-2",
   152  			resolveLayer: 7,
   153  			resolvePath:  "/file-2.txt",
   154  		},
   155  		{
   156  			name:         "indirect link (with overridden data)",
   157  			glob:         "**/link-indirect",
   158  			resolveLayer: 7,
   159  			resolvePath:  "/file-2.txt",
   160  		},
   161  		{
   162  			name: "dead link",
   163  			glob: "**/link-dead",
   164  			// dead links are dead! they shouldn't match on globs
   165  			resolvePath: "",
   166  		},
   167  		{
   168  			name:        "ignore directories",
   169  			glob:        "**/bin",
   170  			resolvePath: "",
   171  		},
   172  		{
   173  			name:         "parent without link",
   174  			glob:         "**/parent/*.txt",
   175  			resolveLayer: 11,
   176  			resolvePath:  "/parent/file-4.txt",
   177  		},
   178  		{
   179  			name:         "parent is a link (override)",
   180  			glob:         "**/parent-link/file-4.txt",
   181  			resolveLayer: 11,
   182  			resolvePath:  "/parent/file-4.txt",
   183  		},
   184  	}
   185  	for _, c := range cases {
   186  		t.Run(c.name, func(t *testing.T) {
   187  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   188  
   189  			resolver, err := NewFromContainerImageSquash(img)
   190  			if err != nil {
   191  				t.Fatalf("could not create resolver: %+v", err)
   192  			}
   193  
   194  			refs, err := resolver.FilesByGlob(c.glob)
   195  			if err != nil {
   196  				t.Fatalf("could not use resolver: %+v", err)
   197  			}
   198  
   199  			expectedRefs := 1
   200  			if c.resolvePath == "" {
   201  				expectedRefs = 0
   202  			}
   203  
   204  			if len(refs) != expectedRefs {
   205  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   206  			}
   207  
   208  			if expectedRefs == 0 {
   209  				// nothing else to assert
   210  				return
   211  			}
   212  
   213  			actual := refs[0]
   214  
   215  			if string(actual.Reference().RealPath) != c.resolvePath {
   216  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   217  			}
   218  
   219  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   220  				t.Errorf("we should always prefer real paths over ones with links")
   221  			}
   222  
   223  			layer := img.FileCatalog.Layer(actual.Reference())
   224  
   225  			if layer.Metadata.Index != c.resolveLayer {
   226  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   227  			}
   228  		})
   229  	}
   230  }
   231  
   232  func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) {
   233  
   234  	tests := []struct {
   235  		fixtureName   string
   236  		mimeType      string
   237  		expectedPaths *strset.Set
   238  	}{
   239  		{
   240  			fixtureName:   "image-simple",
   241  			mimeType:      "text/plain",
   242  			expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"),
   243  		},
   244  	}
   245  
   246  	for _, test := range tests {
   247  		t.Run(test.fixtureName, func(t *testing.T) {
   248  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   249  
   250  			resolver, err := NewFromContainerImageSquash(img)
   251  			assert.NoError(t, err)
   252  
   253  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   254  			assert.NoError(t, err)
   255  
   256  			assert.Len(t, locations, test.expectedPaths.Size())
   257  			for _, l := range locations {
   258  				assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
   259  			}
   260  		})
   261  	}
   262  }
   263  
   264  func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) {
   265  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   266  
   267  	resolver, err := NewFromContainerImageSquash(img)
   268  	assert.NoError(t, err)
   269  
   270  	locations, err := resolver.FilesByMIMEType("text/plain")
   271  	assert.NoError(t, err)
   272  	assert.NotEmpty(t, locations)
   273  	for _, location := range locations {
   274  		assert.NotEmpty(t, location.FileSystemID)
   275  	}
   276  
   277  	locations, err = resolver.FilesByGlob("*.txt")
   278  	assert.NoError(t, err)
   279  	assert.NotEmpty(t, locations)
   280  	for _, location := range locations {
   281  		assert.NotEmpty(t, location.FileSystemID)
   282  	}
   283  
   284  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   285  	assert.NoError(t, err)
   286  	assert.NotEmpty(t, locations)
   287  	for _, location := range locations {
   288  		assert.NotEmpty(t, location.FileSystemID)
   289  	}
   290  
   291  }
   292  
   293  func TestSquashImageResolver_FilesContents(t *testing.T) {
   294  
   295  	tests := []struct {
   296  		name     string
   297  		path     string
   298  		contents []string
   299  	}{
   300  		{
   301  			name: "one degree",
   302  			path: "link-2",
   303  			contents: []string{
   304  				"NEW file override!", // always from the squashed perspective
   305  			},
   306  		},
   307  		{
   308  			name: "two degrees",
   309  			path: "link-indirect",
   310  			contents: []string{
   311  				"NEW file override!", // always from the squashed perspective
   312  			},
   313  		},
   314  		{
   315  			name:     "dead link",
   316  			path:     "link-dead",
   317  			contents: []string{},
   318  		},
   319  	}
   320  
   321  	for _, test := range tests {
   322  		t.Run(test.name, func(t *testing.T) {
   323  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   324  
   325  			resolver, err := NewFromContainerImageSquash(img)
   326  			assert.NoError(t, err)
   327  
   328  			refs, err := resolver.FilesByPath(test.path)
   329  			require.NoError(t, err)
   330  			assert.Len(t, refs, len(test.contents))
   331  
   332  			for idx, loc := range refs {
   333  
   334  				reader, err := resolver.FileContentsByLocation(loc)
   335  				require.NoError(t, err)
   336  
   337  				actual, err := io.ReadAll(reader)
   338  				require.NoError(t, err)
   339  
   340  				assert.Equal(t, test.contents[idx], string(actual))
   341  			}
   342  		})
   343  	}
   344  }
   345  
   346  func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) {
   347  
   348  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   349  
   350  	resolver, err := NewFromContainerImageSquash(img)
   351  	assert.NoError(t, err)
   352  
   353  	var dirLoc *file.Location
   354  	for loc := range resolver.AllLocations() {
   355  		entry, err := resolver.img.FileCatalog.Get(loc.Reference())
   356  		require.NoError(t, err)
   357  		if entry.Metadata.IsDir() {
   358  			dirLoc = &loc
   359  			break
   360  		}
   361  	}
   362  
   363  	require.NotNil(t, dirLoc)
   364  
   365  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   366  	require.Error(t, err)
   367  	require.Nil(t, reader)
   368  }
   369  
   370  func Test_imageSquashResolver_resolvesLinks(t *testing.T) {
   371  	tests := []struct {
   372  		name     string
   373  		runner   func(file.Resolver) []file.Location
   374  		expected []file.Location
   375  	}{
   376  		{
   377  			name: "by mimetype",
   378  			runner: func(resolver file.Resolver) []file.Location {
   379  				// links should not show up when searching mimetype
   380  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   381  				assert.NoError(t, err)
   382  				return actualLocations
   383  			},
   384  			expected: []file.Location{
   385  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   386  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   387  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   388  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   389  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   390  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   391  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   392  			},
   393  		},
   394  		{
   395  			name: "by glob to links",
   396  			runner: func(resolver file.Resolver) []file.Location {
   397  				// links are searched, but resolve to the real files
   398  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   399  				assert.NoError(t, err)
   400  				return actualLocations
   401  			},
   402  			expected: []file.Location{
   403  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   404  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   405  
   406  				// though this is a link, and it matches to the file, the resolver de-duplicates files
   407  				// by the real path, so it is not included in the results
   408  				//file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   409  
   410  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   411  			},
   412  		},
   413  		{
   414  			name: "by basename",
   415  			runner: func(resolver file.Resolver) []file.Location {
   416  				// links are searched, but resolve to the real files
   417  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   418  				assert.NoError(t, err)
   419  				return actualLocations
   420  			},
   421  			expected: []file.Location{
   422  				// this has two copies in the base image, which overwrites the same location
   423  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   424  			},
   425  		},
   426  		{
   427  			name: "by basename glob",
   428  			runner: func(resolver file.Resolver) []file.Location {
   429  				// links are searched, but resolve to the real files
   430  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   431  				assert.NoError(t, err)
   432  				return actualLocations
   433  			},
   434  			expected: []file.Location{
   435  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   436  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   437  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   438  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   439  			},
   440  		},
   441  		{
   442  			name: "by basename glob to links",
   443  			runner: func(resolver file.Resolver) []file.Location {
   444  				actualLocations, err := resolver.FilesByGlob("**/link-*")
   445  				assert.NoError(t, err)
   446  				return actualLocations
   447  			},
   448  			expected: []file.Location{
   449  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   450  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   451  
   452  				// we already have this real file path via another link, so only one is returned
   453  				// file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   454  
   455  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   456  			},
   457  		},
   458  		{
   459  			name: "by extension",
   460  			runner: func(resolver file.Resolver) []file.Location {
   461  				// links are searched, but resolve to the real files
   462  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   463  				assert.NoError(t, err)
   464  				return actualLocations
   465  			},
   466  			expected: []file.Location{
   467  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   468  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   469  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   470  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   471  			},
   472  		},
   473  		{
   474  			name: "by path to degree 1 link",
   475  			runner: func(resolver file.Resolver) []file.Location {
   476  				// links resolve to the final file
   477  				actualLocations, err := resolver.FilesByPath("/link-2")
   478  				assert.NoError(t, err)
   479  				return actualLocations
   480  			},
   481  			expected: []file.Location{
   482  				// we have multiple copies across layers
   483  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   484  			},
   485  		},
   486  		{
   487  			name: "by path to degree 2 link",
   488  			runner: func(resolver file.Resolver) []file.Location {
   489  				// multiple links resolves to the final file
   490  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   491  				assert.NoError(t, err)
   492  				return actualLocations
   493  			},
   494  			expected: []file.Location{
   495  				// we have multiple copies across layers
   496  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   497  			},
   498  		},
   499  	}
   500  
   501  	for _, test := range tests {
   502  		t.Run(test.name, func(t *testing.T) {
   503  
   504  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   505  
   506  			resolver, err := NewFromContainerImageSquash(img)
   507  			assert.NoError(t, err)
   508  
   509  			actual := test.runner(resolver)
   510  
   511  			compareLocations(t, test.expected, actual)
   512  		})
   513  	}
   514  
   515  }
   516  
   517  func compareLocations(t *testing.T, expected, actual []file.Location) {
   518  	t.Helper()
   519  	ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{})
   520  	ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations")
   521  	ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID")
   522  
   523  	sort.Sort(file.Locations(expected))
   524  	sort.Sort(file.Locations(actual))
   525  
   526  	if d := cmp.Diff(expected, actual,
   527  		ignoreUnexported,
   528  		ignoreFS,
   529  		ignoreMetadata,
   530  	); d != "" {
   531  
   532  		t.Errorf("unexpected locations (-want +got):\n%s", d)
   533  	}
   534  
   535  }
   536  
   537  func TestSquashResolver_AllLocations(t *testing.T) {
   538  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   539  
   540  	resolver, err := NewFromContainerImageSquash(img)
   541  	assert.NoError(t, err)
   542  
   543  	paths := strset.New()
   544  	for loc := range resolver.AllLocations() {
   545  		paths.Add(loc.RealPath)
   546  	}
   547  	expected := []string{
   548  		"/Dockerfile",
   549  		"/file-3.txt",
   550  		"/target",
   551  		"/target/file-2.txt",
   552  	}
   553  
   554  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   555  	// this isn't important for the test, so we remove them.
   556  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   557  
   558  	pathsList := paths.List()
   559  	sort.Strings(pathsList)
   560  
   561  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   562  }