github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/internal/fileresolver/container_image_squash_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"io"
     5  	"sort"
     6  	"testing"
     7  
     8  	"github.com/google/go-cmp/cmp"
     9  	"github.com/nextlinux/gosbom/gosbom/file"
    10  	"github.com/scylladb/go-set/strset"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  
    14  	"github.com/anchore/stereoscope/pkg/imagetest"
    15  )
    16  
    17  func TestImageSquashResolver_FilesByPath(t *testing.T) {
    18  	cases := []struct {
    19  		name                 string
    20  		linkPath             string
    21  		resolveLayer         uint
    22  		resolvePath          string
    23  		forcePositiveHasPath bool
    24  	}{
    25  		{
    26  			name:         "link with previous data",
    27  			linkPath:     "/link-1",
    28  			resolveLayer: 1,
    29  			resolvePath:  "/file-1.txt",
    30  		},
    31  		{
    32  			name:         "link with in layer data",
    33  			linkPath:     "/link-within",
    34  			resolveLayer: 5,
    35  			resolvePath:  "/file-3.txt",
    36  		},
    37  		{
    38  			name:         "link with overridden data",
    39  			linkPath:     "/link-2",
    40  			resolveLayer: 7,
    41  			resolvePath:  "/file-2.txt",
    42  		},
    43  		{
    44  			name:         "indirect link (with overridden data)",
    45  			linkPath:     "/link-indirect",
    46  			resolveLayer: 7,
    47  			resolvePath:  "/file-2.txt",
    48  		},
    49  		{
    50  			name:         "dead link",
    51  			linkPath:     "/link-dead",
    52  			resolveLayer: 8,
    53  			resolvePath:  "",
    54  			// the path should exist, even if the link is dead
    55  			forcePositiveHasPath: true,
    56  		},
    57  		{
    58  			name:        "ignore directories",
    59  			linkPath:    "/bin",
    60  			resolvePath: "",
    61  			// the path should exist, even if we ignore it
    62  			forcePositiveHasPath: true,
    63  		},
    64  		{
    65  			name:         "parent is a link (with overridden data)",
    66  			linkPath:     "/parent-link/file-4.txt",
    67  			resolveLayer: 11,
    68  			resolvePath:  "/parent/file-4.txt",
    69  		},
    70  	}
    71  	for _, c := range cases {
    72  		t.Run(c.name, func(t *testing.T) {
    73  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
    74  
    75  			resolver, err := NewFromContainerImageSquash(img)
    76  			if err != nil {
    77  				t.Fatalf("could not create resolver: %+v", err)
    78  			}
    79  
    80  			hasPath := resolver.HasPath(c.linkPath)
    81  			if !c.forcePositiveHasPath {
    82  				if c.resolvePath != "" && !hasPath {
    83  					t.Errorf("expected HasPath() to indicate existance, but did not")
    84  				} else if c.resolvePath == "" && hasPath {
    85  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
    86  				}
    87  			} else if !hasPath {
    88  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
    89  			}
    90  
    91  			refs, err := resolver.FilesByPath(c.linkPath)
    92  			if err != nil {
    93  				t.Fatalf("could not use resolver: %+v", err)
    94  			}
    95  
    96  			expectedRefs := 1
    97  			if c.resolvePath == "" {
    98  				expectedRefs = 0
    99  			}
   100  
   101  			if len(refs) != expectedRefs {
   102  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   103  			}
   104  
   105  			if expectedRefs == 0 {
   106  				// nothing else to assert
   107  				return
   108  			}
   109  
   110  			actual := refs[0]
   111  
   112  			if string(actual.Reference().RealPath) != c.resolvePath {
   113  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   114  			}
   115  
   116  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   117  				t.Errorf("we should always prefer real paths over ones with links")
   118  			}
   119  
   120  			layer := img.FileCatalog.Layer(actual.Reference())
   121  
   122  			if layer.Metadata.Index != c.resolveLayer {
   123  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   124  			}
   125  		})
   126  	}
   127  }
   128  
   129  func TestImageSquashResolver_FilesByGlob(t *testing.T) {
   130  	cases := []struct {
   131  		name         string
   132  		glob         string
   133  		resolveLayer uint
   134  		resolvePath  string
   135  	}{
   136  		{
   137  			name:         "link with previous data",
   138  			glob:         "**/link-1",
   139  			resolveLayer: 1,
   140  			resolvePath:  "/file-1.txt",
   141  		},
   142  		{
   143  			name:         "link with in layer data",
   144  			glob:         "**/link-within",
   145  			resolveLayer: 5,
   146  			resolvePath:  "/file-3.txt",
   147  		},
   148  		{
   149  			name:         "link with overridden data",
   150  			glob:         "**/link-2",
   151  			resolveLayer: 7,
   152  			resolvePath:  "/file-2.txt",
   153  		},
   154  		{
   155  			name:         "indirect link (with overridden data)",
   156  			glob:         "**/link-indirect",
   157  			resolveLayer: 7,
   158  			resolvePath:  "/file-2.txt",
   159  		},
   160  		{
   161  			name: "dead link",
   162  			glob: "**/link-dead",
   163  			// dead links are dead! they shouldn't match on globs
   164  			resolvePath: "",
   165  		},
   166  		{
   167  			name:        "ignore directories",
   168  			glob:        "**/bin",
   169  			resolvePath: "",
   170  		},
   171  		{
   172  			name:         "parent without link",
   173  			glob:         "**/parent/*.txt",
   174  			resolveLayer: 11,
   175  			resolvePath:  "/parent/file-4.txt",
   176  		},
   177  		{
   178  			name:         "parent is a link (override)",
   179  			glob:         "**/parent-link/file-4.txt",
   180  			resolveLayer: 11,
   181  			resolvePath:  "/parent/file-4.txt",
   182  		},
   183  	}
   184  	for _, c := range cases {
   185  		t.Run(c.name, func(t *testing.T) {
   186  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   187  
   188  			resolver, err := NewFromContainerImageSquash(img)
   189  			if err != nil {
   190  				t.Fatalf("could not create resolver: %+v", err)
   191  			}
   192  
   193  			refs, err := resolver.FilesByGlob(c.glob)
   194  			if err != nil {
   195  				t.Fatalf("could not use resolver: %+v", err)
   196  			}
   197  
   198  			expectedRefs := 1
   199  			if c.resolvePath == "" {
   200  				expectedRefs = 0
   201  			}
   202  
   203  			if len(refs) != expectedRefs {
   204  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   205  			}
   206  
   207  			if expectedRefs == 0 {
   208  				// nothing else to assert
   209  				return
   210  			}
   211  
   212  			actual := refs[0]
   213  
   214  			if string(actual.Reference().RealPath) != c.resolvePath {
   215  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   216  			}
   217  
   218  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   219  				t.Errorf("we should always prefer real paths over ones with links")
   220  			}
   221  
   222  			layer := img.FileCatalog.Layer(actual.Reference())
   223  
   224  			if layer.Metadata.Index != c.resolveLayer {
   225  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   226  			}
   227  		})
   228  	}
   229  }
   230  
   231  func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) {
   232  
   233  	tests := []struct {
   234  		fixtureName   string
   235  		mimeType      string
   236  		expectedPaths *strset.Set
   237  	}{
   238  		{
   239  			fixtureName:   "image-simple",
   240  			mimeType:      "text/plain",
   241  			expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"),
   242  		},
   243  	}
   244  
   245  	for _, test := range tests {
   246  		t.Run(test.fixtureName, func(t *testing.T) {
   247  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   248  
   249  			resolver, err := NewFromContainerImageSquash(img)
   250  			assert.NoError(t, err)
   251  
   252  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   253  			assert.NoError(t, err)
   254  
   255  			assert.Len(t, locations, test.expectedPaths.Size())
   256  			for _, l := range locations {
   257  				assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
   258  			}
   259  		})
   260  	}
   261  }
   262  
   263  func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) {
   264  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   265  
   266  	resolver, err := NewFromContainerImageSquash(img)
   267  	assert.NoError(t, err)
   268  
   269  	locations, err := resolver.FilesByMIMEType("text/plain")
   270  	assert.NoError(t, err)
   271  	assert.NotEmpty(t, locations)
   272  	for _, location := range locations {
   273  		assert.NotEmpty(t, location.FileSystemID)
   274  	}
   275  
   276  	locations, err = resolver.FilesByGlob("*.txt")
   277  	assert.NoError(t, err)
   278  	assert.NotEmpty(t, locations)
   279  	for _, location := range locations {
   280  		assert.NotEmpty(t, location.FileSystemID)
   281  	}
   282  
   283  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   284  	assert.NoError(t, err)
   285  	assert.NotEmpty(t, locations)
   286  	for _, location := range locations {
   287  		assert.NotEmpty(t, location.FileSystemID)
   288  	}
   289  
   290  }
   291  
   292  func TestSquashImageResolver_FilesContents(t *testing.T) {
   293  
   294  	tests := []struct {
   295  		name     string
   296  		path     string
   297  		contents []string
   298  	}{
   299  		{
   300  			name: "one degree",
   301  			path: "link-2",
   302  			contents: []string{
   303  				"NEW file override!", // always from the squashed perspective
   304  			},
   305  		},
   306  		{
   307  			name: "two degrees",
   308  			path: "link-indirect",
   309  			contents: []string{
   310  				"NEW file override!", // always from the squashed perspective
   311  			},
   312  		},
   313  		{
   314  			name:     "dead link",
   315  			path:     "link-dead",
   316  			contents: []string{},
   317  		},
   318  	}
   319  
   320  	for _, test := range tests {
   321  		t.Run(test.name, func(t *testing.T) {
   322  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   323  
   324  			resolver, err := NewFromContainerImageSquash(img)
   325  			assert.NoError(t, err)
   326  
   327  			refs, err := resolver.FilesByPath(test.path)
   328  			require.NoError(t, err)
   329  			assert.Len(t, refs, len(test.contents))
   330  
   331  			for idx, loc := range refs {
   332  
   333  				reader, err := resolver.FileContentsByLocation(loc)
   334  				require.NoError(t, err)
   335  
   336  				actual, err := io.ReadAll(reader)
   337  				require.NoError(t, err)
   338  
   339  				assert.Equal(t, test.contents[idx], string(actual))
   340  			}
   341  		})
   342  	}
   343  }
   344  
   345  func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) {
   346  
   347  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   348  
   349  	resolver, err := NewFromContainerImageSquash(img)
   350  	assert.NoError(t, err)
   351  
   352  	var dirLoc *file.Location
   353  	for loc := range resolver.AllLocations() {
   354  		entry, err := resolver.img.FileCatalog.Get(loc.Reference())
   355  		require.NoError(t, err)
   356  		if entry.Metadata.IsDir() {
   357  			dirLoc = &loc
   358  			break
   359  		}
   360  	}
   361  
   362  	require.NotNil(t, dirLoc)
   363  
   364  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   365  	require.Error(t, err)
   366  	require.Nil(t, reader)
   367  }
   368  
   369  func Test_imageSquashResolver_resolvesLinks(t *testing.T) {
   370  	tests := []struct {
   371  		name     string
   372  		runner   func(file.Resolver) []file.Location
   373  		expected []file.Location
   374  	}{
   375  		{
   376  			name: "by mimetype",
   377  			runner: func(resolver file.Resolver) []file.Location {
   378  				// links should not show up when searching mimetype
   379  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   380  				assert.NoError(t, err)
   381  				return actualLocations
   382  			},
   383  			expected: []file.Location{
   384  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   385  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   386  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   387  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   388  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   389  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   390  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   391  			},
   392  		},
   393  		{
   394  			name: "by glob to links",
   395  			runner: func(resolver file.Resolver) []file.Location {
   396  				// links are searched, but resolve to the real files
   397  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   398  				assert.NoError(t, err)
   399  				return actualLocations
   400  			},
   401  			expected: []file.Location{
   402  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   403  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   404  
   405  				// though this is a link, and it matches to the file, the resolver de-duplicates files
   406  				// by the real path, so it is not included in the results
   407  				//file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   408  
   409  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   410  			},
   411  		},
   412  		{
   413  			name: "by basename",
   414  			runner: func(resolver file.Resolver) []file.Location {
   415  				// links are searched, but resolve to the real files
   416  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   417  				assert.NoError(t, err)
   418  				return actualLocations
   419  			},
   420  			expected: []file.Location{
   421  				// this has two copies in the base image, which overwrites the same location
   422  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   423  			},
   424  		},
   425  		{
   426  			name: "by basename glob",
   427  			runner: func(resolver file.Resolver) []file.Location {
   428  				// links are searched, but resolve to the real files
   429  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   430  				assert.NoError(t, err)
   431  				return actualLocations
   432  			},
   433  			expected: []file.Location{
   434  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   435  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   436  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   437  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   438  			},
   439  		},
   440  		{
   441  			name: "by basename glob to links",
   442  			runner: func(resolver file.Resolver) []file.Location {
   443  				actualLocations, err := resolver.FilesByGlob("**/link-*")
   444  				assert.NoError(t, err)
   445  				return actualLocations
   446  			},
   447  			expected: []file.Location{
   448  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   449  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   450  
   451  				// we already have this real file path via another link, so only one is returned
   452  				// file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   453  
   454  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   455  			},
   456  		},
   457  		{
   458  			name: "by extension",
   459  			runner: func(resolver file.Resolver) []file.Location {
   460  				// links are searched, but resolve to the real files
   461  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   462  				assert.NoError(t, err)
   463  				return actualLocations
   464  			},
   465  			expected: []file.Location{
   466  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   467  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   468  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   469  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   470  			},
   471  		},
   472  		{
   473  			name: "by path to degree 1 link",
   474  			runner: func(resolver file.Resolver) []file.Location {
   475  				// links resolve to the final file
   476  				actualLocations, err := resolver.FilesByPath("/link-2")
   477  				assert.NoError(t, err)
   478  				return actualLocations
   479  			},
   480  			expected: []file.Location{
   481  				// we have multiple copies across layers
   482  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   483  			},
   484  		},
   485  		{
   486  			name: "by path to degree 2 link",
   487  			runner: func(resolver file.Resolver) []file.Location {
   488  				// multiple links resolves to the final file
   489  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   490  				assert.NoError(t, err)
   491  				return actualLocations
   492  			},
   493  			expected: []file.Location{
   494  				// we have multiple copies across layers
   495  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   496  			},
   497  		},
   498  	}
   499  
   500  	for _, test := range tests {
   501  		t.Run(test.name, func(t *testing.T) {
   502  
   503  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   504  
   505  			resolver, err := NewFromContainerImageSquash(img)
   506  			assert.NoError(t, err)
   507  
   508  			actual := test.runner(resolver)
   509  
   510  			compareLocations(t, test.expected, actual)
   511  		})
   512  	}
   513  
   514  }
   515  
   516  func TestSquashResolver_AllLocations(t *testing.T) {
   517  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   518  
   519  	resolver, err := NewFromContainerImageSquash(img)
   520  	assert.NoError(t, err)
   521  
   522  	paths := strset.New()
   523  	for loc := range resolver.AllLocations() {
   524  		paths.Add(loc.RealPath)
   525  	}
   526  	expected := []string{
   527  		"/Dockerfile",
   528  		"/file-3.txt",
   529  		"/target",
   530  		"/target/file-2.txt",
   531  	}
   532  
   533  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   534  	// this isn't important for the test, so we remove them.
   535  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   536  
   537  	pathsList := paths.List()
   538  	sort.Strings(pathsList)
   539  
   540  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   541  }