github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/internal/fileresolver/container_image_all_layers_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"runtime"
     7  	"sort"
     8  	"testing"
     9  
    10  	"github.com/google/go-cmp/cmp"
    11  	"github.com/scylladb/go-set/strset"
    12  	"github.com/stretchr/testify/assert"
    13  	"github.com/stretchr/testify/require"
    14  
    15  	"github.com/anchore/stereoscope/pkg/imagetest"
    16  	"github.com/kastenhq/syft/syft/file"
    17  )
    18  
    19  type resolution struct {
    20  	layer uint
    21  	path  string
    22  }
    23  
    24  func TestAllLayersResolver_FilesByPath(t *testing.T) {
    25  	cases := []struct {
    26  		name                 string
    27  		linkPath             string
    28  		resolutions          []resolution
    29  		forcePositiveHasPath bool
    30  	}{
    31  		{
    32  			name:     "link with previous data",
    33  			linkPath: "/link-1",
    34  			resolutions: []resolution{
    35  				{
    36  					layer: 1,
    37  					path:  "/file-1.txt",
    38  				},
    39  			},
    40  		},
    41  		{
    42  			name:     "link with in layer data",
    43  			linkPath: "/link-within",
    44  			resolutions: []resolution{
    45  				{
    46  					layer: 5,
    47  					path:  "/file-3.txt",
    48  				},
    49  			},
    50  		},
    51  		{
    52  			name:     "link with overridden data",
    53  			linkPath: "/link-2",
    54  			resolutions: []resolution{
    55  				{
    56  					layer: 4,
    57  					path:  "/file-2.txt",
    58  				},
    59  				{
    60  					layer: 7,
    61  					path:  "/file-2.txt",
    62  				},
    63  			},
    64  		},
    65  		{
    66  			name:     "indirect link (with overridden data)",
    67  			linkPath: "/link-indirect",
    68  			resolutions: []resolution{
    69  				{
    70  					layer: 4,
    71  					path:  "/file-2.txt",
    72  				},
    73  				{
    74  					layer: 7,
    75  					path:  "/file-2.txt",
    76  				},
    77  			},
    78  		},
    79  		{
    80  			name:                 "dead link",
    81  			linkPath:             "/link-dead",
    82  			resolutions:          []resolution{},
    83  			forcePositiveHasPath: true,
    84  		},
    85  		{
    86  			name:        "ignore directories",
    87  			linkPath:    "/bin",
    88  			resolutions: []resolution{},
    89  			// directories don't resolve BUT do exist
    90  			forcePositiveHasPath: true,
    91  		},
    92  	}
    93  	for _, c := range cases {
    94  		t.Run(c.name, func(t *testing.T) {
    95  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
    96  
    97  			resolver, err := NewFromContainerImageAllLayers(img)
    98  			if err != nil {
    99  				t.Fatalf("could not create resolver: %+v", err)
   100  			}
   101  
   102  			hasPath := resolver.HasPath(c.linkPath)
   103  			if !c.forcePositiveHasPath {
   104  				if len(c.resolutions) > 0 && !hasPath {
   105  					t.Errorf("expected HasPath() to indicate existance, but did not")
   106  				} else if len(c.resolutions) == 0 && hasPath {
   107  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
   108  				}
   109  			} else if !hasPath {
   110  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
   111  			}
   112  
   113  			refs, err := resolver.FilesByPath(c.linkPath)
   114  			if err != nil {
   115  				t.Fatalf("could not use resolver: %+v", err)
   116  			}
   117  
   118  			if len(refs) != len(c.resolutions) {
   119  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   120  			}
   121  
   122  			for idx, actual := range refs {
   123  				expected := c.resolutions[idx]
   124  
   125  				if string(actual.Reference().RealPath) != expected.path {
   126  					t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), expected.path)
   127  				}
   128  
   129  				if expected.path != "" && string(actual.Reference().RealPath) != actual.RealPath {
   130  					t.Errorf("we should always prefer real paths over ones with links")
   131  				}
   132  
   133  				layer := img.FileCatalog.Layer(actual.Reference())
   134  				if layer.Metadata.Index != expected.layer {
   135  					t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, expected.layer)
   136  				}
   137  			}
   138  		})
   139  	}
   140  }
   141  
   142  func TestAllLayersResolver_FilesByGlob(t *testing.T) {
   143  	cases := []struct {
   144  		name        string
   145  		glob        string
   146  		resolutions []resolution
   147  	}{
   148  		{
   149  			name: "link with previous data",
   150  			glob: "**/*ink-1",
   151  			resolutions: []resolution{
   152  				{
   153  					layer: 1,
   154  					path:  "/file-1.txt",
   155  				},
   156  			},
   157  		},
   158  		{
   159  			name: "link with in layer data",
   160  			glob: "**/*nk-within",
   161  			resolutions: []resolution{
   162  				{
   163  					layer: 5,
   164  					path:  "/file-3.txt",
   165  				},
   166  			},
   167  		},
   168  		{
   169  			name: "link with overridden data",
   170  			glob: "**/*ink-2",
   171  			resolutions: []resolution{
   172  				{
   173  					layer: 4,
   174  					path:  "/file-2.txt",
   175  				},
   176  				{
   177  					layer: 7,
   178  					path:  "/file-2.txt",
   179  				},
   180  			},
   181  		},
   182  		{
   183  			name: "indirect link (with overridden data)",
   184  			glob: "**/*nk-indirect",
   185  			resolutions: []resolution{
   186  				{
   187  					layer: 4,
   188  					path:  "/file-2.txt",
   189  				},
   190  				{
   191  					layer: 7,
   192  					path:  "/file-2.txt",
   193  				},
   194  			},
   195  		},
   196  		{
   197  			name:        "dead link",
   198  			glob:        "**/*k-dead",
   199  			resolutions: []resolution{},
   200  		},
   201  		{
   202  			name:        "ignore directories",
   203  			glob:        "**/bin",
   204  			resolutions: []resolution{},
   205  		},
   206  	}
   207  	for _, c := range cases {
   208  		t.Run(c.name, func(t *testing.T) {
   209  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   210  
   211  			resolver, err := NewFromContainerImageAllLayers(img)
   212  			if err != nil {
   213  				t.Fatalf("could not create resolver: %+v", err)
   214  			}
   215  
   216  			refs, err := resolver.FilesByGlob(c.glob)
   217  			if err != nil {
   218  				t.Fatalf("could not use resolver: %+v", err)
   219  			}
   220  
   221  			if len(refs) != len(c.resolutions) {
   222  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   223  			}
   224  
   225  			for idx, actual := range refs {
   226  				expected := c.resolutions[idx]
   227  
   228  				if string(actual.Reference().RealPath) != expected.path {
   229  					t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), expected.path)
   230  				}
   231  
   232  				if expected.path != "" && string(actual.Reference().RealPath) != actual.RealPath {
   233  					t.Errorf("we should always prefer real paths over ones with links")
   234  				}
   235  
   236  				layer := img.FileCatalog.Layer(actual.Reference())
   237  
   238  				if layer.Metadata.Index != expected.layer {
   239  					t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, expected.layer)
   240  				}
   241  			}
   242  		})
   243  	}
   244  }
   245  
   246  func Test_imageAllLayersResolver_FilesByMIMEType(t *testing.T) {
   247  
   248  	tests := []struct {
   249  		fixtureName   string
   250  		mimeType      string
   251  		expectedPaths []string
   252  	}{
   253  		{
   254  			fixtureName:   "image-duplicate-path",
   255  			mimeType:      "text/plain",
   256  			expectedPaths: []string{"/somefile-1.txt", "/somefile-1.txt"},
   257  		},
   258  	}
   259  	for _, test := range tests {
   260  		t.Run(test.fixtureName, func(t *testing.T) {
   261  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   262  
   263  			resolver, err := NewFromContainerImageAllLayers(img)
   264  			assert.NoError(t, err)
   265  
   266  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   267  			assert.NoError(t, err)
   268  
   269  			assert.Len(t, test.expectedPaths, len(locations))
   270  			for idx, l := range locations {
   271  				assert.Equal(t, test.expectedPaths[idx], l.RealPath, "does not have path %q", l.RealPath)
   272  			}
   273  		})
   274  	}
   275  }
   276  
   277  func Test_imageAllLayersResolver_hasFilesystemIDInLocation(t *testing.T) {
   278  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   279  
   280  	resolver, err := NewFromContainerImageAllLayers(img)
   281  	assert.NoError(t, err)
   282  
   283  	locations, err := resolver.FilesByMIMEType("text/plain")
   284  	assert.NoError(t, err)
   285  	assert.NotEmpty(t, locations)
   286  	for _, location := range locations {
   287  		assert.NotEmpty(t, location.FileSystemID)
   288  	}
   289  
   290  	locations, err = resolver.FilesByGlob("*.txt")
   291  	assert.NoError(t, err)
   292  	assert.NotEmpty(t, locations)
   293  	for _, location := range locations {
   294  		assert.NotEmpty(t, location.FileSystemID)
   295  	}
   296  
   297  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   298  	assert.NoError(t, err)
   299  	assert.NotEmpty(t, locations)
   300  	for _, location := range locations {
   301  		assert.NotEmpty(t, location.FileSystemID)
   302  	}
   303  
   304  }
   305  
   306  func TestAllLayersImageResolver_FilesContents(t *testing.T) {
   307  
   308  	tests := []struct {
   309  		name     string
   310  		fixture  string
   311  		contents []string
   312  	}{
   313  		{
   314  			name:    "one degree",
   315  			fixture: "link-2",
   316  			contents: []string{
   317  				"file 2!",            // from the first resolved layer's perspective
   318  				"NEW file override!", // from the second resolved layers perspective
   319  			},
   320  		},
   321  		{
   322  			name:    "two degrees",
   323  			fixture: "link-indirect",
   324  			contents: []string{
   325  				"file 2!",
   326  				"NEW file override!",
   327  			},
   328  		},
   329  		{
   330  			name:     "dead link",
   331  			fixture:  "link-dead",
   332  			contents: []string{},
   333  		},
   334  	}
   335  
   336  	for _, test := range tests {
   337  		t.Run(test.name, func(t *testing.T) {
   338  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   339  
   340  			resolver, err := NewFromContainerImageAllLayers(img)
   341  			assert.NoError(t, err)
   342  
   343  			refs, err := resolver.FilesByPath(test.fixture)
   344  			require.NoError(t, err)
   345  
   346  			// the given path should have an overridden file
   347  			require.Len(t, refs, len(test.contents))
   348  
   349  			for idx, loc := range refs {
   350  				reader, err := resolver.FileContentsByLocation(loc)
   351  				require.NoError(t, err)
   352  
   353  				actual, err := io.ReadAll(reader)
   354  				require.NoError(t, err)
   355  
   356  				assert.Equal(t, test.contents[idx], string(actual))
   357  			}
   358  
   359  		})
   360  	}
   361  }
   362  
   363  func TestAllLayersImageResolver_FilesContents_errorOnDirRequest(t *testing.T) {
   364  
   365  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   366  
   367  	resolver, err := NewFromContainerImageAllLayers(img)
   368  	assert.NoError(t, err)
   369  
   370  	var dirLoc *file.Location
   371  	for loc := range resolver.AllLocations() {
   372  		entry, err := resolver.img.FileCatalog.Get(loc.Reference())
   373  		require.NoError(t, err)
   374  		if entry.Metadata.IsDir() {
   375  			dirLoc = &loc
   376  			break
   377  		}
   378  	}
   379  
   380  	require.NotNil(t, dirLoc)
   381  
   382  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   383  	require.Error(t, err)
   384  	require.Nil(t, reader)
   385  }
   386  
   387  func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) {
   388  	tests := []struct {
   389  		name     string
   390  		runner   func(file.Resolver) []file.Location
   391  		expected []file.Location
   392  	}{
   393  		{
   394  			name: "by mimetype",
   395  			runner: func(resolver file.Resolver) []file.Location {
   396  				// links should not show up when searching mimetype
   397  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   398  				assert.NoError(t, err)
   399  				return actualLocations
   400  			},
   401  			expected: []file.Location{
   402  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   403  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   404  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   405  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   406  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 1
   407  				// note: we're de-duping the redundant access to file-3.txt
   408  				// ... (there would usually be two copies)
   409  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   410  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),               // copy 2
   411  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), // copy 1
   412  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), // copy 2
   413  			},
   414  		},
   415  		{
   416  			name: "by glob to links",
   417  			runner: func(resolver file.Resolver) []file.Location {
   418  				// links are searched, but resolve to the real files
   419  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   420  				assert.NoError(t, err)
   421  				return actualLocations
   422  			},
   423  			expected: []file.Location{
   424  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   425  				file.NewVirtualLocation("/file-2.txt", "/link-2"), // copy 1
   426  				file.NewVirtualLocation("/file-2.txt", "/link-2"), // copy 2
   427  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   428  			},
   429  		},
   430  		{
   431  			name: "by basename",
   432  			runner: func(resolver file.Resolver) []file.Location {
   433  				// links are searched, but resolve to the real files
   434  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   435  				assert.NoError(t, err)
   436  				return actualLocations
   437  			},
   438  			expected: []file.Location{
   439  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 1
   440  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 2
   441  			},
   442  		},
   443  		{
   444  			name: "by basename glob",
   445  			runner: func(resolver file.Resolver) []file.Location {
   446  				// links are searched, but resolve to the real files
   447  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   448  				assert.NoError(t, err)
   449  				return actualLocations
   450  			},
   451  			expected: []file.Location{
   452  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   453  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 1
   454  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 2
   455  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   456  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   457  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), // when we copy into the link path, the same file-4.txt is copied
   458  			},
   459  		},
   460  		{
   461  			name: "by extension",
   462  			runner: func(resolver file.Resolver) []file.Location {
   463  				// links are searched, but resolve to the real files
   464  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   465  				assert.NoError(t, err)
   466  				return actualLocations
   467  			},
   468  			expected: []file.Location{
   469  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   470  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 1
   471  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), // copy 2
   472  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   473  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   474  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), // when we copy into the link path, the same file-4.txt is copied
   475  			},
   476  		},
   477  		{
   478  			name: "by path to degree 1 link",
   479  			runner: func(resolver file.Resolver) []file.Location {
   480  				// links resolve to the final file
   481  				actualLocations, err := resolver.FilesByPath("/link-2")
   482  				assert.NoError(t, err)
   483  				return actualLocations
   484  			},
   485  			expected: []file.Location{
   486  				// we have multiple copies across layers
   487  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   488  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   489  			},
   490  		},
   491  		{
   492  			name: "by path to degree 2 link",
   493  			runner: func(resolver file.Resolver) []file.Location {
   494  				// multiple links resolves to the final file
   495  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   496  				assert.NoError(t, err)
   497  				return actualLocations
   498  			},
   499  			expected: []file.Location{
   500  				// we have multiple copies across layers
   501  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   502  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   503  			},
   504  		},
   505  	}
   506  
   507  	for _, test := range tests {
   508  		t.Run(test.name, func(t *testing.T) {
   509  
   510  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   511  
   512  			resolver, err := NewFromContainerImageAllLayers(img)
   513  			assert.NoError(t, err)
   514  
   515  			actual := test.runner(resolver)
   516  
   517  			compareLocations(t, test.expected, actual)
   518  		})
   519  	}
   520  
   521  }
   522  
   523  func TestAllLayersResolver_AllLocations(t *testing.T) {
   524  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   525  
   526  	arch := "x86_64"
   527  	if runtime.GOARCH == "arm64" {
   528  		arch = "aarch64"
   529  	}
   530  
   531  	resolver, err := NewFromContainerImageAllLayers(img)
   532  	assert.NoError(t, err)
   533  
   534  	paths := strset.New()
   535  	for loc := range resolver.AllLocations() {
   536  		paths.Add(loc.RealPath)
   537  	}
   538  	expected := []string{
   539  		"/Dockerfile",
   540  		"/file-1.txt",
   541  		"/file-3.txt",
   542  		"/target",
   543  		"/target/file-2.txt",
   544  
   545  		"/.wh.bin",
   546  		"/.wh.file-1.txt",
   547  		"/.wh.lib",
   548  		"/bin",
   549  		"/bin/arch",
   550  		"/bin/ash",
   551  		"/bin/base64",
   552  		"/bin/bbconfig",
   553  		"/bin/busybox",
   554  		"/bin/cat",
   555  		"/bin/chattr",
   556  		"/bin/chgrp",
   557  		"/bin/chmod",
   558  		"/bin/chown",
   559  		"/bin/cp",
   560  		"/bin/date",
   561  		"/bin/dd",
   562  		"/bin/df",
   563  		"/bin/dmesg",
   564  		"/bin/dnsdomainname",
   565  		"/bin/dumpkmap",
   566  		"/bin/echo",
   567  		"/bin/ed",
   568  		"/bin/egrep",
   569  		"/bin/false",
   570  		"/bin/fatattr",
   571  		"/bin/fdflush",
   572  		"/bin/fgrep",
   573  		"/bin/fsync",
   574  		"/bin/getopt",
   575  		"/bin/grep",
   576  		"/bin/gunzip",
   577  		"/bin/gzip",
   578  		"/bin/hostname",
   579  		"/bin/ionice",
   580  		"/bin/iostat",
   581  		"/bin/ipcalc",
   582  		"/bin/kbd_mode",
   583  		"/bin/kill",
   584  		"/bin/link",
   585  		"/bin/linux32",
   586  		"/bin/linux64",
   587  		"/bin/ln",
   588  		"/bin/login",
   589  		"/bin/ls",
   590  		"/bin/lsattr",
   591  		"/bin/lzop",
   592  		"/bin/makemime",
   593  		"/bin/mkdir",
   594  		"/bin/mknod",
   595  		"/bin/mktemp",
   596  		"/bin/more",
   597  		"/bin/mount",
   598  		"/bin/mountpoint",
   599  		"/bin/mpstat",
   600  		"/bin/mv",
   601  		"/bin/netstat",
   602  		"/bin/nice",
   603  		"/bin/pidof",
   604  		"/bin/ping",
   605  		"/bin/ping6",
   606  		"/bin/pipe_progress",
   607  		"/bin/printenv",
   608  		"/bin/ps",
   609  		"/bin/pwd",
   610  		"/bin/reformime",
   611  		"/bin/rev",
   612  		"/bin/rm",
   613  		"/bin/rmdir",
   614  		"/bin/run-parts",
   615  		"/bin/sed",
   616  		"/bin/setpriv",
   617  		"/bin/setserial",
   618  		"/bin/sh",
   619  		"/bin/sleep",
   620  		"/bin/stat",
   621  		"/bin/stty",
   622  		"/bin/su",
   623  		"/bin/sync",
   624  		"/bin/tar",
   625  		"/bin/touch",
   626  		"/bin/true",
   627  		"/bin/umount",
   628  		"/bin/uname",
   629  		"/bin/usleep",
   630  		"/bin/watch",
   631  		"/bin/zcat",
   632  		"/lib",
   633  		"/lib/apk",
   634  		"/lib/apk/db",
   635  		"/lib/apk/db/installed",
   636  		"/lib/apk/db/lock",
   637  		"/lib/apk/db/scripts.tar",
   638  		"/lib/apk/db/triggers",
   639  		"/lib/apk/exec",
   640  		"/lib/firmware",
   641  		fmt.Sprintf("/lib/ld-musl-%s.so.1", arch),
   642  		"/lib/libapk.so.3.12.0",
   643  		fmt.Sprintf("/lib/libc.musl-%s.so.1", arch),
   644  		"/lib/libcrypto.so.3",
   645  		"/lib/libssl.so.3",
   646  		"/lib/libz.so.1",
   647  		"/lib/libz.so.1.2.13",
   648  		"/lib/mdev",
   649  		"/lib/modules-load.d",
   650  		"/lib/sysctl.d",
   651  		"/lib/sysctl.d/00-alpine.conf",
   652  	}
   653  
   654  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   655  	// this isn't important for the test, so we remove them.
   656  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   657  
   658  	pathsList := paths.List()
   659  	sort.Strings(pathsList)
   660  
   661  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   662  }