github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_deep_squash_test.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"sort"
     8  	"sync"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/google/go-cmp/cmp"
    13  	"github.com/scylladb/go-set/strset"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  	"go.uber.org/goleak"
    17  
    18  	"github.com/anchore/stereoscope/pkg/imagetest"
    19  	"github.com/anchore/syft/syft/file"
    20  )
    21  
    22  type mockSimpleResolver struct {
    23  	file.Resolver // embed to fulfill the interface, panics for stuff not implemented
    24  	paths         *strset.Set
    25  	locations     map[string][]file.Location
    26  }
    27  
    28  func newMockSimpleResolver(locations []file.Location) *mockSimpleResolver {
    29  	paths := strset.New()
    30  	locationMap := make(map[string][]file.Location)
    31  	for _, loc := range locations {
    32  		paths.Add(loc.RealPath)
    33  		paths.Add(loc.AccessPath)
    34  		locationMap[loc.RealPath] = append(locationMap[loc.RealPath], loc)
    35  	}
    36  	return &mockSimpleResolver{
    37  		paths:     paths,
    38  		locations: locationMap,
    39  	}
    40  }
    41  
    42  func (m *mockSimpleResolver) HasPath(p string) bool {
    43  	return m.paths.Has(p)
    44  }
    45  
    46  func (m *mockSimpleResolver) FilesByPath(paths ...string) ([]file.Location, error) {
    47  	var results []file.Location
    48  	for _, path := range paths {
    49  		if locs, exists := m.locations[path]; exists {
    50  			results = append(results, locs...)
    51  		}
    52  	}
    53  	return results, nil
    54  }
    55  
    56  func Test_ContainerImageDeepSquash_FilesByPath(t *testing.T) {
    57  	cases := []struct {
    58  		name                 string
    59  		linkPath             string
    60  		resolveLayer         uint
    61  		resolvePath          string
    62  		forcePositiveHasPath bool
    63  		expectedRefs         int
    64  	}{
    65  		{
    66  			name:         "link with previous data",
    67  			linkPath:     "/link-1",
    68  			resolveLayer: 1,
    69  			resolvePath:  "/file-1.txt",
    70  			expectedRefs: 1,
    71  		},
    72  		{
    73  			name:         "link with in layer data",
    74  			linkPath:     "/link-within",
    75  			resolveLayer: 5,
    76  			resolvePath:  "/file-3.txt",
    77  			expectedRefs: 1,
    78  		},
    79  		{
    80  			name:         "link with overridden data",
    81  			linkPath:     "/link-2",
    82  			resolveLayer: 7,
    83  			resolvePath:  "/file-2.txt",
    84  			expectedRefs: 2,
    85  		},
    86  		{
    87  			name:         "indirect link (with overridden data)",
    88  			linkPath:     "/link-indirect",
    89  			resolveLayer: 7,
    90  			resolvePath:  "/file-2.txt",
    91  			expectedRefs: 2,
    92  		},
    93  		{
    94  			name:         "dead link",
    95  			linkPath:     "/link-dead",
    96  			resolveLayer: 8,
    97  			resolvePath:  "",
    98  			// the path should exist, even if the link is dead
    99  			forcePositiveHasPath: true,
   100  		},
   101  		{
   102  			name:        "ignore directories",
   103  			linkPath:    "/bin",
   104  			resolvePath: "",
   105  			// the path should exist, even if we ignore it
   106  			forcePositiveHasPath: true,
   107  		},
   108  		{
   109  			name:         "parent is a link (with overridden data)",
   110  			linkPath:     "/parent-link/file-4.txt",
   111  			resolveLayer: 11,
   112  			resolvePath:  "/parent/file-4.txt",
   113  			expectedRefs: 1,
   114  		},
   115  	}
   116  	for _, c := range cases {
   117  		t.Run(c.name, func(t *testing.T) {
   118  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   119  
   120  			resolver, err := NewFromContainerImageDeepSquash(img)
   121  			require.NoError(t, err)
   122  
   123  			hasPath := resolver.HasPath(c.linkPath)
   124  			if !c.forcePositiveHasPath {
   125  				if c.resolvePath != "" && !hasPath {
   126  					t.Errorf("expected HasPath() to indicate existance, but did not")
   127  				} else if c.resolvePath == "" && hasPath {
   128  					t.Errorf("expeced HasPath() to NOT indicate existance, but does")
   129  				}
   130  			} else if !hasPath {
   131  				t.Errorf("expected HasPath() to indicate existance, but did not (force path)")
   132  			}
   133  
   134  			refs, err := resolver.FilesByPath(c.linkPath)
   135  			require.NoError(t, err)
   136  
   137  			expectedRefs := c.expectedRefs
   138  			if c.resolvePath == "" {
   139  				expectedRefs = 0
   140  			}
   141  
   142  			if len(refs) != expectedRefs {
   143  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   144  			}
   145  
   146  			if expectedRefs == 0 {
   147  				// nothing else to assert
   148  				return
   149  			}
   150  
   151  			actual := refs[0]
   152  
   153  			if string(actual.Reference().RealPath) != c.resolvePath {
   154  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   155  			}
   156  
   157  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   158  				t.Errorf("we should always prefer real paths over ones with links")
   159  			}
   160  
   161  			layer := img.FileCatalog.Layer(actual.Reference())
   162  
   163  			if layer.Metadata.Index != c.resolveLayer {
   164  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   165  			}
   166  		})
   167  	}
   168  }
   169  
   170  func Test_ContainerImageDeepSquash_FilesByGlob(t *testing.T) {
   171  	cases := []struct {
   172  		name         string
   173  		glob         string
   174  		resolveLayer uint
   175  		resolvePath  string
   176  		expectedRefs int
   177  	}{
   178  		{
   179  			name:         "link with previous data",
   180  			glob:         "**/link-1",
   181  			resolveLayer: 1,
   182  			resolvePath:  "/file-1.txt",
   183  			expectedRefs: 1,
   184  		},
   185  		{
   186  			name:         "link with in layer data",
   187  			glob:         "**/link-within",
   188  			resolveLayer: 5,
   189  			resolvePath:  "/file-3.txt",
   190  			expectedRefs: 1,
   191  		},
   192  		{
   193  			name:         "link with overridden data",
   194  			glob:         "**/link-2",
   195  			resolveLayer: 7,
   196  			resolvePath:  "/file-2.txt",
   197  			expectedRefs: 2,
   198  		},
   199  		{
   200  			name:         "indirect link (with overridden data)",
   201  			glob:         "**/link-indirect",
   202  			resolveLayer: 7,
   203  			resolvePath:  "/file-2.txt",
   204  			expectedRefs: 2,
   205  		},
   206  		{
   207  			name: "dead link",
   208  			glob: "**/link-dead",
   209  			// dead links are dead! they shouldn't match on globs
   210  			resolvePath: "",
   211  		},
   212  		{
   213  			name:        "ignore directories",
   214  			glob:        "**/bin",
   215  			resolvePath: "",
   216  		},
   217  		{
   218  			name:         "parent without link",
   219  			glob:         "**/parent/*.txt",
   220  			resolveLayer: 11,
   221  			resolvePath:  "/parent/file-4.txt",
   222  			expectedRefs: 2,
   223  		},
   224  		{
   225  			name:         "parent is a link (override)",
   226  			glob:         "**/parent-link/file-4.txt",
   227  			resolveLayer: 11,
   228  			resolvePath:  "/parent/file-4.txt",
   229  			expectedRefs: 2,
   230  		},
   231  	}
   232  	for _, c := range cases {
   233  		t.Run(c.name, func(t *testing.T) {
   234  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   235  
   236  			resolver, err := NewFromContainerImageDeepSquash(img)
   237  			require.NoError(t, err)
   238  
   239  			refs, err := resolver.FilesByGlob(c.glob)
   240  			require.NoError(t, err)
   241  
   242  			expectedRefs := c.expectedRefs
   243  			if c.resolvePath == "" {
   244  				expectedRefs = 0
   245  			}
   246  
   247  			if len(refs) != expectedRefs {
   248  				t.Fatalf("unexpected number of resolutions: %d", len(refs))
   249  			}
   250  
   251  			if expectedRefs == 0 {
   252  				// nothing else to assert
   253  				return
   254  			}
   255  
   256  			actual := refs[0]
   257  
   258  			if string(actual.Reference().RealPath) != c.resolvePath {
   259  				t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath)
   260  			}
   261  
   262  			if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath {
   263  				t.Errorf("we should always prefer real paths over ones with links")
   264  			}
   265  
   266  			layer := img.FileCatalog.Layer(actual.Reference())
   267  
   268  			if layer.Metadata.Index != c.resolveLayer {
   269  				t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer)
   270  			}
   271  		})
   272  	}
   273  }
   274  
   275  func Test_ContainerImageDeepSquash_FilesByMIMEType(t *testing.T) {
   276  
   277  	tests := []struct {
   278  		fixtureName   string
   279  		mimeType      string
   280  		expectedPaths *strset.Set
   281  	}{
   282  		{
   283  			fixtureName:   "image-simple",
   284  			mimeType:      "text/plain",
   285  			expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"),
   286  		},
   287  	}
   288  
   289  	for _, test := range tests {
   290  		t.Run(test.fixtureName, func(t *testing.T) {
   291  			img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName)
   292  
   293  			resolver, err := NewFromContainerImageDeepSquash(img)
   294  			assert.NoError(t, err)
   295  
   296  			locations, err := resolver.FilesByMIMEType(test.mimeType)
   297  			assert.NoError(t, err)
   298  
   299  			assert.Len(t, locations, test.expectedPaths.Size())
   300  			for _, l := range locations {
   301  				assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
   302  			}
   303  		})
   304  	}
   305  }
   306  
   307  func Test_ContainerImageDeepSquash_hasFilesystemIDInLocation(t *testing.T) {
   308  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path")
   309  
   310  	resolver, err := NewFromContainerImageDeepSquash(img)
   311  	assert.NoError(t, err)
   312  
   313  	locations, err := resolver.FilesByMIMEType("text/plain")
   314  	assert.NoError(t, err)
   315  	assert.NotEmpty(t, locations)
   316  	for _, location := range locations {
   317  		assert.NotEmpty(t, location.FileSystemID)
   318  	}
   319  
   320  	locations, err = resolver.FilesByGlob("*.txt")
   321  	assert.NoError(t, err)
   322  	assert.NotEmpty(t, locations)
   323  	for _, location := range locations {
   324  		assert.NotEmpty(t, location.FileSystemID)
   325  	}
   326  
   327  	locations, err = resolver.FilesByPath("/somefile-1.txt")
   328  	assert.NoError(t, err)
   329  	assert.NotEmpty(t, locations)
   330  	for _, location := range locations {
   331  		assert.NotEmpty(t, location.FileSystemID)
   332  	}
   333  
   334  }
   335  
   336  func Test_ContainerImageDeepSquash_FilesContents(t *testing.T) {
   337  
   338  	tests := []struct {
   339  		name     string
   340  		path     string
   341  		contents []string
   342  	}{
   343  		{
   344  			name: "one degree",
   345  			path: "link-2",
   346  			contents: []string{
   347  				"NEW file override!",
   348  				"file 2!",
   349  			},
   350  		},
   351  		{
   352  			name: "two degrees",
   353  			path: "link-indirect",
   354  			contents: []string{
   355  				"NEW file override!",
   356  				"file 2!",
   357  			},
   358  		},
   359  		{
   360  			name:     "dead link",
   361  			path:     "link-dead",
   362  			contents: []string{},
   363  		},
   364  	}
   365  
   366  	for _, test := range tests {
   367  		t.Run(test.name, func(t *testing.T) {
   368  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   369  
   370  			resolver, err := NewFromContainerImageDeepSquash(img)
   371  			assert.NoError(t, err)
   372  
   373  			refs, err := resolver.FilesByPath(test.path)
   374  			require.NoError(t, err)
   375  			assert.Len(t, refs, len(test.contents))
   376  
   377  			for idx, loc := range refs {
   378  
   379  				reader, err := resolver.FileContentsByLocation(loc)
   380  				require.NoError(t, err)
   381  
   382  				actual, err := io.ReadAll(reader)
   383  				require.NoError(t, err)
   384  
   385  				assert.Equal(t, test.contents[idx], string(actual))
   386  			}
   387  		})
   388  	}
   389  }
   390  
   391  func Test_ContainerImageDeepSquash_FilesContents_errorOnDirRequest(t *testing.T) {
   392  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   393  
   394  	resolver, err := NewFromContainerImageDeepSquash(img)
   395  	assert.NoError(t, err)
   396  
   397  	var dirLoc *file.Location
   398  	ctx, cancel := context.WithCancel(context.Background())
   399  	defer cancel()
   400  
   401  	for loc := range resolver.AllLocations(ctx) {
   402  		// this is known to be a directory in the test fixture
   403  		if dirLoc == nil && loc.RealPath == "/parent" {
   404  			dirLoc = &loc
   405  		}
   406  	}
   407  
   408  	require.NotNil(t, dirLoc)
   409  
   410  	reader, err := resolver.FileContentsByLocation(*dirLoc)
   411  	require.Error(t, err)
   412  	require.Nil(t, reader)
   413  }
   414  
   415  func Test_ContainerImageDeepSquash_resolvesLinks(t *testing.T) {
   416  	tests := []struct {
   417  		name     string
   418  		runner   func(file.Resolver) []file.Location
   419  		expected []file.Location
   420  	}{
   421  		{
   422  			name: "by mimetype",
   423  			runner: func(resolver file.Resolver) []file.Location {
   424  				// links should not show up when searching mimetype
   425  				actualLocations, err := resolver.FilesByMIMEType("text/plain")
   426  				assert.NoError(t, err)
   427  				return actualLocations
   428  			},
   429  			expected: []file.Location{
   430  				file.NewVirtualLocation("/etc/group", "/etc/group"),
   431  				file.NewVirtualLocation("/etc/passwd", "/etc/passwd"),
   432  				file.NewVirtualLocation("/etc/shadow", "/etc/shadow"),
   433  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   434  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   435  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   436  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   437  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   438  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   439  			},
   440  		},
   441  		{
   442  			name: "by glob to links",
   443  			runner: func(resolver file.Resolver) []file.Location {
   444  				// links are searched, but resolve to the real files
   445  				actualLocations, err := resolver.FilesByGlob("*ink-*")
   446  				assert.NoError(t, err)
   447  				return actualLocations
   448  			},
   449  			expected: []file.Location{
   450  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   451  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   452  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   453  
   454  				// though this is a link, and it matches to the file, the resolver de-duplicates files
   455  				// by the real path, so it is not included in the results
   456  				//file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   457  
   458  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   459  			},
   460  		},
   461  		{
   462  			name: "by basename",
   463  			runner: func(resolver file.Resolver) []file.Location {
   464  				// links are searched, but resolve to the real files
   465  				actualLocations, err := resolver.FilesByGlob("**/file-2.txt")
   466  				assert.NoError(t, err)
   467  				return actualLocations
   468  			},
   469  			expected: []file.Location{
   470  				// this has two copies in the base image, which overwrites the same location
   471  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   472  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   473  			},
   474  		},
   475  		{
   476  			name: "by basename glob",
   477  			runner: func(resolver file.Resolver) []file.Location {
   478  				// links are searched, but resolve to the real files
   479  				actualLocations, err := resolver.FilesByGlob("**/file-?.txt")
   480  				assert.NoError(t, err)
   481  				return actualLocations
   482  			},
   483  			expected: []file.Location{
   484  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   485  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   486  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   487  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   488  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   489  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   490  			},
   491  		},
   492  		{
   493  			name: "by basename glob to links",
   494  			runner: func(resolver file.Resolver) []file.Location {
   495  				actualLocations, err := resolver.FilesByGlob("**/link-*")
   496  				assert.NoError(t, err)
   497  				return actualLocations
   498  			},
   499  			expected: []file.Location{
   500  				file.NewVirtualLocation("/file-1.txt", "/link-1"),
   501  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   502  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   503  
   504  				// we already have this real file path via another link, so only one is returned
   505  				// file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   506  
   507  				file.NewVirtualLocation("/file-3.txt", "/link-within"),
   508  			},
   509  		},
   510  		{
   511  			name: "by extension",
   512  			runner: func(resolver file.Resolver) []file.Location {
   513  				// links are searched, but resolve to the real files
   514  				actualLocations, err := resolver.FilesByGlob("**/*.txt")
   515  				assert.NoError(t, err)
   516  				return actualLocations
   517  			},
   518  			expected: []file.Location{
   519  				file.NewVirtualLocation("/file-1.txt", "/file-1.txt"),
   520  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   521  				file.NewVirtualLocation("/file-2.txt", "/file-2.txt"),
   522  				file.NewVirtualLocation("/file-3.txt", "/file-3.txt"),
   523  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   524  				file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"),
   525  			},
   526  		},
   527  		{
   528  			name: "by path to degree 1 link",
   529  			runner: func(resolver file.Resolver) []file.Location {
   530  				// links resolve to the final file
   531  				actualLocations, err := resolver.FilesByPath("/link-2")
   532  				assert.NoError(t, err)
   533  				return actualLocations
   534  			},
   535  			expected: []file.Location{
   536  				// we have multiple copies across layers
   537  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   538  				file.NewVirtualLocation("/file-2.txt", "/link-2"),
   539  			},
   540  		},
   541  		{
   542  			name: "by path to degree 2 link",
   543  			runner: func(resolver file.Resolver) []file.Location {
   544  				// multiple links resolves to the final file
   545  				actualLocations, err := resolver.FilesByPath("/link-indirect")
   546  				assert.NoError(t, err)
   547  				return actualLocations
   548  			},
   549  			expected: []file.Location{
   550  				// we have multiple copies across layers
   551  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   552  				file.NewVirtualLocation("/file-2.txt", "/link-indirect"),
   553  			},
   554  		},
   555  	}
   556  
   557  	for _, test := range tests {
   558  		t.Run(test.name, func(t *testing.T) {
   559  
   560  			img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks")
   561  
   562  			resolver, err := NewFromContainerImageDeepSquash(img)
   563  			assert.NoError(t, err)
   564  
   565  			actual := test.runner(resolver)
   566  
   567  			compareLocations(t, test.expected, actual)
   568  		})
   569  	}
   570  
   571  }
   572  
   573  func Test_ContainerImageDeepSquash_AllLocations(t *testing.T) {
   574  	img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted")
   575  
   576  	resolver, err := NewFromContainerImageDeepSquash(img)
   577  	assert.NoError(t, err)
   578  
   579  	paths := strset.New()
   580  	for loc := range resolver.AllLocations(context.Background()) {
   581  		paths.Add(loc.RealPath)
   582  	}
   583  	expected := []string{
   584  		"/Dockerfile",
   585  		"/file-3.txt",
   586  		"/target",
   587  		"/target/file-2.txt",
   588  	}
   589  
   590  	// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
   591  	// this isn't important for the test, so we remove them.
   592  	paths.Remove("/proc", "/sys", "/dev", "/etc")
   593  
   594  	// remove cache created by Mac Rosetta when emulating different arches
   595  	paths.Remove("/.cache/rosetta", "/.cache")
   596  
   597  	pathsList := paths.List()
   598  	sort.Strings(pathsList)
   599  
   600  	assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
   601  }
   602  
   603  func TestContainerImageDeepSquash_MergeLocations(t *testing.T) {
   604  	tests := []struct {
   605  		name                string
   606  		squashedLocations   file.LocationSet
   607  		allLayersLocations  file.LocationSet
   608  		expectedLocations   int
   609  		expectedVisibleOnly bool
   610  	}{
   611  		{
   612  			name:                "empty squashed locations returns empty",
   613  			squashedLocations:   file.NewLocationSet(),
   614  			allLayersLocations:  file.NewLocationSet(makeLocation("/some/path", 1)),
   615  			expectedLocations:   0,
   616  			expectedVisibleOnly: false,
   617  		},
   618  		{
   619  			name: "only squashed locations returns all as visible",
   620  			squashedLocations: file.NewLocationSet(
   621  				makeLocation("/path/one", 1),
   622  				makeLocation("/path/two", 1),
   623  			),
   624  			allLayersLocations:  file.NewLocationSet(),
   625  			expectedLocations:   2,
   626  			expectedVisibleOnly: true,
   627  		},
   628  		{
   629  			name:                "deduplicates matching locations between squashed and all layers + additional hidden locations",
   630  			squashedLocations:   file.NewLocationSet(makeLocation("/path/one", 2)),
   631  			allLayersLocations:  file.NewLocationSet(makeLocation("/path/one", 2), makeLocation("/path/one", 1)),
   632  			expectedLocations:   2,
   633  			expectedVisibleOnly: false,
   634  		},
   635  		{
   636  			name:                "deduplicates matching locations between squashed and all layers",
   637  			squashedLocations:   file.NewLocationSet(makeLocation("/path/one", 1)),
   638  			allLayersLocations:  file.NewLocationSet(makeLocation("/path/one", 1)),
   639  			expectedLocations:   1,
   640  			expectedVisibleOnly: true,
   641  		},
   642  		{
   643  			name:              "all layers locations with paths not in squashed tree are excluded",
   644  			squashedLocations: file.NewLocationSet(makeLocation("/path/one", 1)),
   645  			allLayersLocations: file.NewLocationSet(
   646  				makeLocation("/path/one", 1),             // layer 2 version will be skipped (deduped)
   647  				makeLocation("/path/not/in/squashed", 2), // will be excluded due to path not in squashed
   648  			),
   649  			expectedLocations:   1,
   650  			expectedVisibleOnly: true,
   651  		},
   652  		{
   653  			name:              "includes hidden locations from all layers when path in squashed tree",
   654  			squashedLocations: file.NewLocationSet(makeLocation("/path/one", 1), makeLocation("/path/two", 2)),
   655  			allLayersLocations: file.NewLocationSet(
   656  				makeLocation("/path/one", 1), // will be deduped
   657  				makeLocation("/path/one", 2), // will be included as hidden
   658  				makeLocation("/path/two", 2), // will be deduped
   659  				makeLocation("/path/two", 3), // will be included as hidden
   660  				makeLocation("/path/two", 4), // will be included as hidden
   661  			),
   662  			expectedLocations:   5, // 2 from squashed + 3 from layers for path/two
   663  			expectedVisibleOnly: false,
   664  		},
   665  		{
   666  			name: "complex scenario with multiple paths and layers",
   667  			squashedLocations: file.NewLocationSet(
   668  				makeLocation("/bin/bash", 1),
   669  				makeLocation("/etc/passwd", 2),
   670  				makeLocation("/var/log/syslog", 3),
   671  			),
   672  			allLayersLocations: file.NewLocationSet(
   673  				makeLocation("/bin/bash", 1),          // will be deduped
   674  				makeLocation("/bin/bash", 0),          // will be included as hidden
   675  				makeLocation("/etc/passwd", 2),        // will be deduped
   676  				makeLocation("/etc/passwd", 0),        // will be included as hidden
   677  				makeLocation("/var/log/syslog", 3),    // will be deduped
   678  				makeLocation("/var/log/syslog", 0),    // will be included as hidden
   679  				makeLocation("/tmp/not-in-squash", 4), // will be excluded - not in squashed
   680  			),
   681  			expectedLocations:   6, // 3 from squashed + 3 hidden from all layers
   682  			expectedVisibleOnly: false,
   683  		},
   684  		{
   685  			name: "include virtual locations",
   686  			squashedLocations: file.NewLocationSet(
   687  				makeLocation("/path/one", 1),
   688  				makeLocation("/path/two", 2),
   689  				makeLocation("/path/to-one", 2), // a symlink
   690  			),
   691  			allLayersLocations: file.NewLocationSet(
   692  				makeLocation("/path/one", 1), // will be deduped
   693  				makeVirtualLocation("/path/one", "/path/to-one", 2),
   694  			),
   695  			expectedLocations:   4,
   696  			expectedVisibleOnly: false,
   697  		},
   698  		{
   699  			name: "don't include hidden virtual locations",
   700  			squashedLocations: file.NewLocationSet(
   701  				makeLocation("/path/one", 1),
   702  			),
   703  			allLayersLocations: file.NewLocationSet(
   704  				makeLocation("/path/one", 1),                        // will be deduped
   705  				makeVirtualLocation("/path/one", "/path/to-one", 2), // would have been included if /path/to-one was in the squash tree
   706  			),
   707  			expectedLocations:   1,
   708  			expectedVisibleOnly: true,
   709  		},
   710  	}
   711  
   712  	for _, tt := range tests {
   713  		t.Run(tt.name, func(t *testing.T) {
   714  
   715  			resolver := &ContainerImageDeepSquash{
   716  				squashed:  newMockSimpleResolver(tt.squashedLocations.ToSlice()),
   717  				allLayers: newMockSimpleResolver(tt.allLayersLocations.ToSlice()),
   718  			}
   719  
   720  			squashedLocations := tt.squashedLocations.ToSlice()
   721  			allLayersLocations := tt.allLayersLocations.ToSlice()
   722  
   723  			mergedLocations := resolver.mergeLocations(squashedLocations, allLayersLocations)
   724  
   725  			require.Len(t, mergedLocations, tt.expectedLocations, "incorrect number of merged locations (expected %d, found %d)", tt.expectedLocations, len(mergedLocations))
   726  
   727  			if tt.expectedLocations > 0 {
   728  				onlyVisible := true
   729  				for _, loc := range mergedLocations {
   730  					if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok {
   731  						if annotation != file.VisibleAnnotation {
   732  							onlyVisible = false
   733  							break
   734  						}
   735  					}
   736  				}
   737  				assert.Equal(t, tt.expectedVisibleOnly, onlyVisible, "visibility annotation check failed")
   738  
   739  			}
   740  
   741  			visibleCount := 0
   742  			hiddenCount := 0
   743  			for _, loc := range mergedLocations {
   744  				if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok {
   745  					if annotation == file.VisibleAnnotation {
   746  						visibleCount++
   747  					} else if annotation == file.HiddenAnnotation {
   748  						hiddenCount++
   749  					}
   750  				}
   751  			}
   752  
   753  			// for test cases where we expect some hidden annotations...
   754  			if !tt.expectedVisibleOnly && tt.expectedLocations > 0 {
   755  				assert.Greater(t, hiddenCount, 0, "expected some hidden locations but found none")
   756  				assert.Greater(t, visibleCount, 0, "expected some visible locations but found none")
   757  			}
   758  
   759  			// for test cases where we expect only visible annotations...
   760  			if tt.expectedVisibleOnly && tt.expectedLocations > 0 {
   761  				assert.Equal(t, tt.expectedLocations, visibleCount, "incorrect number of visible locations")
   762  				assert.Equal(t, 0, hiddenCount, "found hidden locations when expecting only visible")
   763  			}
   764  		})
   765  	}
   766  }
   767  
   768  func TestContainerImageDeepSquash_MergeLocationStreams(t *testing.T) {
   769  	tests := []struct {
   770  		name                string
   771  		squashedLocations   []file.Location
   772  		allLayersLocations  []file.Location
   773  		expectedLocations   int
   774  		expectedVisibleOnly bool
   775  	}{
   776  		{
   777  			name:                "empty squashed locations returns empty",
   778  			squashedLocations:   []file.Location{},
   779  			allLayersLocations:  []file.Location{makeLocation("/some/path", 1)},
   780  			expectedLocations:   0,
   781  			expectedVisibleOnly: false,
   782  		},
   783  		{
   784  			name: "only squashed locations returns all as visible",
   785  			squashedLocations: []file.Location{
   786  				makeLocation("/path/one", 1),
   787  				makeLocation("/path/two", 1),
   788  			},
   789  			allLayersLocations:  []file.Location{},
   790  			expectedLocations:   2,
   791  			expectedVisibleOnly: true,
   792  		},
   793  		{
   794  			name:                "exact match locations are deduped",
   795  			squashedLocations:   []file.Location{makeLocation("/path/one", 1)},
   796  			allLayersLocations:  []file.Location{makeLocation("/path/one", 1)},
   797  			expectedLocations:   1,
   798  			expectedVisibleOnly: true,
   799  		},
   800  		{
   801  			name:                "different layers same path not deduped",
   802  			squashedLocations:   []file.Location{makeLocation("/path/one", 2)},
   803  			allLayersLocations:  []file.Location{makeLocation("/path/one", 1)},
   804  			expectedLocations:   2, // 1 visible from squashed + 1 hidden from all layers
   805  			expectedVisibleOnly: false,
   806  		},
   807  		{
   808  			name:              "all layers with path not in squashed are excluded",
   809  			squashedLocations: []file.Location{makeLocation("/path/one", 1)},
   810  			allLayersLocations: []file.Location{
   811  				makeLocation("/path/one", 2),
   812  				makeLocation("/not/in/squashed", 3),
   813  			},
   814  			expectedLocations:   2, // 1 from squashed + 1 from all layers (path/one)
   815  			expectedVisibleOnly: false,
   816  		},
   817  		{
   818  			name: "includes all layer versions for paths in squashed",
   819  			squashedLocations: []file.Location{
   820  				makeLocation("/path/one", 3),
   821  				makeLocation("/path/two", 2),
   822  			},
   823  			allLayersLocations: []file.Location{
   824  				makeLocation("/path/one", 1),
   825  				makeLocation("/path/one", 2),
   826  				makeLocation("/path/two", 2), // will be deduped
   827  				makeLocation("/path/two", 3),
   828  				makeLocation("/path/two", 4),
   829  			},
   830  			expectedLocations:   6, // 2 from squashed + 4 from all layers
   831  			expectedVisibleOnly: false,
   832  		},
   833  		{
   834  			name: "complex scenario with multiple paths and layers",
   835  			squashedLocations: []file.Location{
   836  				makeLocation("/bin/bash", 5),
   837  				makeLocation("/etc/passwd", 3),
   838  				makeLocation("/var/log/syslog", 2),
   839  			},
   840  			allLayersLocations: []file.Location{
   841  				makeLocation("/bin/bash", 1),
   842  				makeLocation("/bin/bash", 2),
   843  				makeLocation("/bin/bash", 3),
   844  				makeLocation("/bin/bash", 4),
   845  				makeLocation("/bin/bash", 5), // will be deduped
   846  				makeLocation("/etc/passwd", 1),
   847  				makeLocation("/etc/passwd", 2),
   848  				makeLocation("/etc/passwd", 3), // will be deduped
   849  				makeLocation("/var/log/syslog", 1),
   850  				makeLocation("/var/log/syslog", 2),    // will be deduped
   851  				makeLocation("/tmp/not-in-squash", 1), // not included
   852  			},
   853  			expectedLocations:   10, // 3 from squashed + 7 from all layers (3 excluded due to dedup/path)
   854  			expectedVisibleOnly: false,
   855  		},
   856  		{
   857  			name: "include virtual locations",
   858  			squashedLocations: []file.Location{
   859  				makeLocation("/path/one", 1),
   860  				makeLocation("/path/two", 2),
   861  				makeLocation("/path/to-one", 2), // a symlink
   862  			},
   863  			allLayersLocations: []file.Location{
   864  				makeLocation("/path/one", 1), // will be deduped
   865  				makeVirtualLocation("/path/one", "/path/to-one", 2),
   866  			},
   867  			expectedLocations:   4,
   868  			expectedVisibleOnly: false,
   869  		},
   870  		{
   871  			name: "don't include hidden virtual locations",
   872  			squashedLocations: []file.Location{
   873  				makeLocation("/path/one", 1),
   874  			},
   875  			allLayersLocations: []file.Location{
   876  				makeLocation("/path/one", 1),                        // will be deduped
   877  				makeVirtualLocation("/path/one", "/path/to-one", 2), // would have been included if /path/to-one was in the squash tree
   878  			},
   879  			expectedLocations:   1,
   880  			expectedVisibleOnly: true,
   881  		},
   882  	}
   883  
   884  	for _, tt := range tests {
   885  		t.Run(tt.name, func(t *testing.T) {
   886  			ctx, cancel := context.WithCancel(context.Background())
   887  			defer cancel()
   888  
   889  			resolver := &ContainerImageDeepSquash{
   890  				squashed: newMockSimpleResolver(tt.squashedLocations),
   891  			}
   892  
   893  			squashedChan := make(chan file.Location)
   894  			allLayersChan := make(chan file.Location)
   895  
   896  			wg := &sync.WaitGroup{}
   897  			wg.Add(2)
   898  
   899  			go func() {
   900  				defer wg.Done()
   901  				defer close(squashedChan)
   902  				for _, loc := range tt.squashedLocations {
   903  					squashedChan <- loc
   904  				}
   905  			}()
   906  
   907  			go func() {
   908  				defer wg.Done()
   909  				defer close(allLayersChan)
   910  				for _, loc := range tt.allLayersLocations {
   911  					allLayersChan <- loc
   912  				}
   913  			}()
   914  
   915  			mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan)
   916  
   917  			var mergedLocations []file.Location
   918  			for loc := range mergedChan {
   919  				mergedLocations = append(mergedLocations, loc)
   920  			}
   921  
   922  			assert.Equal(t, tt.expectedLocations, len(mergedLocations), "incorrect number of merged locations")
   923  
   924  			visibleCount := 0
   925  			hiddenCount := 0
   926  			duplicateFound := false
   927  
   928  			// track seen locations to verify deduplication
   929  			seenLocations := make(map[file.LocationData]int)
   930  
   931  			for _, loc := range mergedLocations {
   932  				// check for duplicates
   933  				seenLocations[loc.LocationData]++
   934  				if seenLocations[loc.LocationData] > 1 {
   935  					duplicateFound = true
   936  				}
   937  
   938  				// count annotations
   939  				if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok {
   940  					if annotation == file.VisibleAnnotation {
   941  						visibleCount++
   942  					} else if annotation == file.HiddenAnnotation {
   943  						hiddenCount++
   944  					}
   945  				}
   946  			}
   947  
   948  			assert.False(t, duplicateFound, "found duplicate locations when none expected")
   949  
   950  			// check visibility annotations
   951  			if tt.expectedVisibleOnly && len(mergedLocations) > 0 {
   952  				assert.Equal(t, len(mergedLocations), visibleCount,
   953  					"incorrect number of visible locations")
   954  				assert.Equal(t, 0, hiddenCount,
   955  					"found hidden locations when expecting only visible")
   956  			}
   957  
   958  			if !tt.expectedVisibleOnly && len(mergedLocations) > 0 {
   959  				assert.Greater(t, hiddenCount, 0,
   960  					"expected some hidden locations but found none")
   961  				assert.Greater(t, visibleCount, 0,
   962  					"expected some visible locations but found none")
   963  			}
   964  
   965  			wg.Wait()
   966  
   967  			goleak.VerifyNone(t)
   968  		})
   969  	}
   970  }
   971  
   972  func TestContainerImageDeepSquash_MergeLocationStreams_FunCases(t *testing.T) {
   973  
   974  	t.Run("concurrent context cancellation", func(t *testing.T) {
   975  		upstreamCtx, upstreamCancel := context.WithCancel(context.Background())
   976  
   977  		ctx, cancel := context.WithCancel(context.Background())
   978  
   979  		resolver := &ContainerImageDeepSquash{
   980  			squashed: newMockSimpleResolver(nil),
   981  		}
   982  
   983  		squashedChan := make(chan file.Location)
   984  		allLayersChan := make(chan file.Location)
   985  
   986  		wg := &sync.WaitGroup{}
   987  		wg.Add(2)
   988  
   989  		go func() {
   990  			defer wg.Done()
   991  			defer close(squashedChan)
   992  
   993  			count := 0
   994  			for {
   995  				count++
   996  				loc := makeLocation(fmt.Sprintf("/path/%d", count), 2)
   997  				select {
   998  				case <-upstreamCtx.Done():
   999  					return
  1000  				case squashedChan <- loc:
  1001  				}
  1002  			}
  1003  		}()
  1004  
  1005  		go func() {
  1006  			defer wg.Done()
  1007  			defer close(allLayersChan)
  1008  
  1009  			count := 0
  1010  			for {
  1011  				count++
  1012  				loc := makeLocation(fmt.Sprintf("/path/%d", count), 2)
  1013  				select {
  1014  				case <-upstreamCtx.Done():
  1015  					return
  1016  				case allLayersChan <- loc:
  1017  				}
  1018  			}
  1019  		}()
  1020  
  1021  		mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan)
  1022  
  1023  		go func() {
  1024  			<-time.After(5 * time.Millisecond)
  1025  			cancel()
  1026  			time.Sleep(10 * time.Millisecond)
  1027  			upstreamCancel()
  1028  		}()
  1029  
  1030  		for range mergedChan {
  1031  			// drain
  1032  		}
  1033  		wg.Wait()
  1034  
  1035  		goleak.VerifyNone(t)
  1036  	})
  1037  
  1038  	t.Run("empty streams", func(t *testing.T) {
  1039  		ctx, cancel := context.WithCancel(context.Background())
  1040  		defer cancel()
  1041  
  1042  		resolver := &ContainerImageDeepSquash{
  1043  			squashed: newMockSimpleResolver([]file.Location{}),
  1044  		}
  1045  
  1046  		squashedChan := make(chan file.Location)
  1047  		allLayersChan := make(chan file.Location)
  1048  		close(squashedChan)
  1049  		close(allLayersChan)
  1050  
  1051  		mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan)
  1052  
  1053  		var count int
  1054  		// should return immediately with no results (not block)
  1055  		for range mergedChan {
  1056  			count++
  1057  		}
  1058  		assert.Equal(t, 0, count, "expected no results from empty streams")
  1059  	})
  1060  
  1061  	t.Run("squashed empty but all layers has data", func(t *testing.T) {
  1062  		ctx, cancel := context.WithCancel(context.Background())
  1063  		defer cancel()
  1064  
  1065  		resolver := &ContainerImageDeepSquash{
  1066  			squashed: newMockSimpleResolver([]file.Location{}),
  1067  		}
  1068  
  1069  		squashedChan := make(chan file.Location)
  1070  		allLayersChan := make(chan file.Location)
  1071  		close(squashedChan)
  1072  
  1073  		wg := &sync.WaitGroup{}
  1074  		wg.Add(1)
  1075  
  1076  		go func() {
  1077  			defer close(allLayersChan)
  1078  			defer wg.Done()
  1079  
  1080  			allLayersChan <- makeLocation("/path/one", 1)
  1081  		}()
  1082  
  1083  		mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan)
  1084  
  1085  		// should return no results since squashed is empty
  1086  		var count int
  1087  		for range mergedChan {
  1088  			count++
  1089  		}
  1090  
  1091  		wg.Wait()
  1092  
  1093  		assert.Equal(t, 0, count, "expected no results when squashed is empty")
  1094  	})
  1095  }
  1096  
  1097  func makeLocation(path string, layer int) file.Location {
  1098  	return file.NewLocationFromCoordinates(file.Coordinates{
  1099  		RealPath:     path,
  1100  		FileSystemID: fmt.Sprintf("layer-%d", layer),
  1101  	})
  1102  }
  1103  
  1104  func makeVirtualLocation(path, access string, layer int) file.Location {
  1105  	return file.NewVirtualLocationFromCoordinates(file.Coordinates{
  1106  		RealPath:     path,
  1107  		FileSystemID: fmt.Sprintf("layer-%d", layer),
  1108  	}, access)
  1109  }