github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/container_image_squash_test.go (about) 1 package fileresolver 2 3 import ( 4 "context" 5 "io" 6 "slices" 7 "sort" 8 "strings" 9 "testing" 10 11 "github.com/google/go-cmp/cmp" 12 "github.com/google/go-cmp/cmp/cmpopts" 13 "github.com/scylladb/go-set/strset" 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 17 "github.com/anchore/stereoscope/pkg/imagetest" 18 "github.com/anchore/syft/syft/file" 19 ) 20 21 func TestImageSquashResolver_FilesByPath(t *testing.T) { 22 cases := []struct { 23 name string 24 linkPath string 25 resolveLayer uint 26 resolvePath string 27 forcePositiveHasPath bool 28 }{ 29 { 30 name: "link with previous data", 31 linkPath: "/link-1", 32 resolveLayer: 1, 33 resolvePath: "/file-1.txt", 34 }, 35 { 36 name: "link with in layer data", 37 linkPath: "/link-within", 38 resolveLayer: 5, 39 resolvePath: "/file-3.txt", 40 }, 41 { 42 name: "link with overridden data", 43 linkPath: "/link-2", 44 resolveLayer: 7, 45 resolvePath: "/file-2.txt", 46 }, 47 { 48 name: "indirect link (with overridden data)", 49 linkPath: "/link-indirect", 50 resolveLayer: 7, 51 resolvePath: "/file-2.txt", 52 }, 53 { 54 name: "dead link", 55 linkPath: "/link-dead", 56 resolveLayer: 8, 57 resolvePath: "", 58 // the path should exist, even if the link is dead 59 forcePositiveHasPath: true, 60 }, 61 { 62 name: "ignore directories", 63 linkPath: "/bin", 64 resolvePath: "", 65 // the path should exist, even if we ignore it 66 forcePositiveHasPath: true, 67 }, 68 { 69 name: "parent is a link (with overridden data)", 70 linkPath: "/parent-link/file-4.txt", 71 resolveLayer: 11, 72 resolvePath: "/parent/file-4.txt", 73 }, 74 } 75 for _, c := range cases { 76 t.Run(c.name, func(t *testing.T) { 77 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 78 79 resolver, err := NewFromContainerImageSquash(img) 80 require.NoError(t, err) 81 82 hasPath := resolver.HasPath(c.linkPath) 83 if !c.forcePositiveHasPath { 84 if c.resolvePath != "" && !hasPath { 85 t.Errorf("expected HasPath() to indicate existance, but did not") 86 } else if c.resolvePath == "" && hasPath { 87 t.Errorf("expeced HasPath() to NOT indicate existance, but does") 88 } 89 } else if !hasPath { 90 t.Errorf("expected HasPath() to indicate existance, but did not (force path)") 91 } 92 93 refs, err := resolver.FilesByPath(c.linkPath) 94 require.NoError(t, err) 95 96 expectedRefs := 1 97 if c.resolvePath == "" { 98 expectedRefs = 0 99 } 100 101 if len(refs) != expectedRefs { 102 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 103 } 104 105 if expectedRefs == 0 { 106 // nothing else to assert 107 return 108 } 109 110 actual := refs[0] 111 112 if string(actual.Reference().RealPath) != c.resolvePath { 113 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 114 } 115 116 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 117 t.Errorf("we should always prefer real paths over ones with links") 118 } 119 120 layer := img.FileCatalog.Layer(actual.Reference()) 121 122 if layer.Metadata.Index != c.resolveLayer { 123 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 124 } 125 }) 126 } 127 } 128 129 func TestImageSquashResolver_FilesByGlob(t *testing.T) { 130 cases := []struct { 131 name string 132 glob string 133 resolveLayer uint 134 resolvePath string 135 }{ 136 { 137 name: "link with previous data", 138 glob: "**/link-1", 139 resolveLayer: 1, 140 resolvePath: "/file-1.txt", 141 }, 142 { 143 name: "link with in layer data", 144 glob: "**/link-within", 145 resolveLayer: 5, 146 resolvePath: "/file-3.txt", 147 }, 148 { 149 name: "link with overridden data", 150 glob: "**/link-2", 151 resolveLayer: 7, 152 resolvePath: "/file-2.txt", 153 }, 154 { 155 name: "indirect link (with overridden data)", 156 glob: "**/link-indirect", 157 resolveLayer: 7, 158 resolvePath: "/file-2.txt", 159 }, 160 { 161 name: "dead link", 162 glob: "**/link-dead", 163 // dead links are dead! they shouldn't match on globs 164 resolvePath: "", 165 }, 166 { 167 name: "ignore directories", 168 glob: "**/bin", 169 resolvePath: "", 170 }, 171 { 172 name: "parent without link", 173 glob: "**/parent/*.txt", 174 resolveLayer: 11, 175 resolvePath: "/parent/file-4.txt", 176 }, 177 { 178 name: "parent is a link (override)", 179 glob: "**/parent-link/file-4.txt", 180 resolveLayer: 11, 181 resolvePath: "/parent/file-4.txt", 182 }, 183 } 184 for _, c := range cases { 185 t.Run(c.name, func(t *testing.T) { 186 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 187 188 resolver, err := NewFromContainerImageSquash(img) 189 require.NoError(t, err) 190 191 refs, err := resolver.FilesByGlob(c.glob) 192 require.NoError(t, err) 193 194 expectedRefs := 1 195 if c.resolvePath == "" { 196 expectedRefs = 0 197 } 198 199 if len(refs) != expectedRefs { 200 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 201 } 202 203 if expectedRefs == 0 { 204 // nothing else to assert 205 return 206 } 207 208 actual := refs[0] 209 210 if string(actual.Reference().RealPath) != c.resolvePath { 211 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 212 } 213 214 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 215 t.Errorf("we should always prefer real paths over ones with links") 216 } 217 218 layer := img.FileCatalog.Layer(actual.Reference()) 219 220 if layer.Metadata.Index != c.resolveLayer { 221 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 222 } 223 }) 224 } 225 } 226 227 func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) { 228 229 tests := []struct { 230 fixtureName string 231 mimeType string 232 expectedPaths *strset.Set 233 }{ 234 { 235 fixtureName: "image-simple", 236 mimeType: "text/plain", 237 expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), 238 }, 239 } 240 241 for _, test := range tests { 242 t.Run(test.fixtureName, func(t *testing.T) { 243 img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) 244 245 resolver, err := NewFromContainerImageSquash(img) 246 assert.NoError(t, err) 247 248 locations, err := resolver.FilesByMIMEType(test.mimeType) 249 assert.NoError(t, err) 250 251 assert.Len(t, locations, test.expectedPaths.Size()) 252 for _, l := range locations { 253 assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) 254 } 255 }) 256 } 257 } 258 259 func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { 260 img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") 261 262 resolver, err := NewFromContainerImageSquash(img) 263 assert.NoError(t, err) 264 265 locations, err := resolver.FilesByMIMEType("text/plain") 266 assert.NoError(t, err) 267 assert.NotEmpty(t, locations) 268 for _, location := range locations { 269 assert.NotEmpty(t, location.FileSystemID) 270 } 271 272 locations, err = resolver.FilesByGlob("*.txt") 273 assert.NoError(t, err) 274 assert.NotEmpty(t, locations) 275 for _, location := range locations { 276 assert.NotEmpty(t, location.FileSystemID) 277 } 278 279 locations, err = resolver.FilesByPath("/somefile-1.txt") 280 assert.NoError(t, err) 281 assert.NotEmpty(t, locations) 282 for _, location := range locations { 283 assert.NotEmpty(t, location.FileSystemID) 284 } 285 286 } 287 288 func TestSquashImageResolver_FilesContents(t *testing.T) { 289 290 tests := []struct { 291 name string 292 path string 293 contents []string 294 }{ 295 { 296 name: "one degree", 297 path: "link-2", 298 contents: []string{ 299 "NEW file override!", // always from the squashed perspective 300 }, 301 }, 302 { 303 name: "two degrees", 304 path: "link-indirect", 305 contents: []string{ 306 "NEW file override!", // always from the squashed perspective 307 }, 308 }, 309 { 310 name: "dead link", 311 path: "link-dead", 312 contents: []string{}, 313 }, 314 } 315 316 for _, test := range tests { 317 t.Run(test.name, func(t *testing.T) { 318 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 319 320 resolver, err := NewFromContainerImageSquash(img) 321 assert.NoError(t, err) 322 323 refs, err := resolver.FilesByPath(test.path) 324 require.NoError(t, err) 325 assert.Len(t, refs, len(test.contents)) 326 327 for idx, loc := range refs { 328 329 reader, err := resolver.FileContentsByLocation(loc) 330 require.NoError(t, err) 331 332 actual, err := io.ReadAll(reader) 333 require.NoError(t, err) 334 335 assert.Equal(t, test.contents[idx], string(actual)) 336 } 337 }) 338 } 339 } 340 341 func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { 342 343 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 344 345 resolver, err := NewFromContainerImageSquash(img) 346 assert.NoError(t, err) 347 348 var dirLoc *file.Location 349 ctx, cancel := context.WithCancel(context.Background()) 350 defer cancel() 351 for loc := range resolver.AllLocations(ctx) { 352 entry, err := resolver.img.FileCatalog.Get(loc.Reference()) 353 require.NoError(t, err) 354 if entry.Metadata.IsDir() { 355 dirLoc = &loc 356 break 357 } 358 } 359 360 require.NotNil(t, dirLoc) 361 362 reader, err := resolver.FileContentsByLocation(*dirLoc) 363 require.Error(t, err) 364 require.Nil(t, reader) 365 } 366 367 func Test_imageSquashResolver_resolvesLinks(t *testing.T) { 368 tests := []struct { 369 name string 370 runner func(file.Resolver) []file.Location 371 expected []file.Location 372 }{ 373 { 374 name: "by mimetype", 375 runner: func(resolver file.Resolver) []file.Location { 376 // links should not show up when searching mimetype 377 actualLocations, err := resolver.FilesByMIMEType("text/plain") 378 assert.NoError(t, err) 379 return actualLocations 380 }, 381 expected: []file.Location{ 382 file.NewVirtualLocation("/etc/group", "/etc/group"), 383 file.NewVirtualLocation("/etc/passwd", "/etc/passwd"), 384 file.NewVirtualLocation("/etc/shadow", "/etc/shadow"), 385 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 386 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 387 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 388 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 389 }, 390 }, 391 { 392 name: "by glob to links", 393 runner: func(resolver file.Resolver) []file.Location { 394 // links are searched, but resolve to the real files 395 actualLocations, err := resolver.FilesByGlob("*ink-*") 396 assert.NoError(t, err) 397 return actualLocations 398 }, 399 expected: []file.Location{ 400 file.NewVirtualLocation("/file-1.txt", "/link-1"), 401 file.NewVirtualLocation("/file-2.txt", "/link-2"), 402 403 // though this is a link, and it matches to the file, the resolver de-duplicates files 404 // by the real path, so it is not included in the results 405 //file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 406 407 file.NewVirtualLocation("/file-3.txt", "/link-within"), 408 }, 409 }, 410 { 411 name: "by basename", 412 runner: func(resolver file.Resolver) []file.Location { 413 // links are searched, but resolve to the real files 414 actualLocations, err := resolver.FilesByGlob("**/file-2.txt") 415 assert.NoError(t, err) 416 return actualLocations 417 }, 418 expected: []file.Location{ 419 // this has two copies in the base image, which overwrites the same location 420 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 421 }, 422 }, 423 { 424 name: "by basename glob", 425 runner: func(resolver file.Resolver) []file.Location { 426 // links are searched, but resolve to the real files 427 actualLocations, err := resolver.FilesByGlob("**/file-?.txt") 428 assert.NoError(t, err) 429 return actualLocations 430 }, 431 expected: []file.Location{ 432 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 433 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 434 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 435 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 436 }, 437 }, 438 { 439 name: "by basename glob to links", 440 runner: func(resolver file.Resolver) []file.Location { 441 actualLocations, err := resolver.FilesByGlob("**/link-*") 442 assert.NoError(t, err) 443 return actualLocations 444 }, 445 expected: []file.Location{ 446 file.NewVirtualLocation("/file-1.txt", "/link-1"), 447 file.NewVirtualLocation("/file-2.txt", "/link-2"), 448 449 // we already have this real file path via another link, so only one is returned 450 // file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 451 452 file.NewVirtualLocation("/file-3.txt", "/link-within"), 453 }, 454 }, 455 { 456 name: "by extension", 457 runner: func(resolver file.Resolver) []file.Location { 458 // links are searched, but resolve to the real files 459 actualLocations, err := resolver.FilesByGlob("**/*.txt") 460 assert.NoError(t, err) 461 return actualLocations 462 }, 463 expected: []file.Location{ 464 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 465 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 466 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 467 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 468 }, 469 }, 470 { 471 name: "by path to degree 1 link", 472 runner: func(resolver file.Resolver) []file.Location { 473 // links resolve to the final file 474 actualLocations, err := resolver.FilesByPath("/link-2") 475 assert.NoError(t, err) 476 return actualLocations 477 }, 478 expected: []file.Location{ 479 // we have multiple copies across layers 480 file.NewVirtualLocation("/file-2.txt", "/link-2"), 481 }, 482 }, 483 { 484 name: "by path to degree 2 link", 485 runner: func(resolver file.Resolver) []file.Location { 486 // multiple links resolves to the final file 487 actualLocations, err := resolver.FilesByPath("/link-indirect") 488 assert.NoError(t, err) 489 return actualLocations 490 }, 491 expected: []file.Location{ 492 // we have multiple copies across layers 493 file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 494 }, 495 }, 496 } 497 498 for _, test := range tests { 499 t.Run(test.name, func(t *testing.T) { 500 501 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 502 503 resolver, err := NewFromContainerImageSquash(img) 504 assert.NoError(t, err) 505 506 actual := test.runner(resolver) 507 508 compareLocations(t, test.expected, actual) 509 }) 510 } 511 512 } 513 514 func compareLocations(t *testing.T, expected, actual []file.Location) { 515 t.Helper() 516 ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{}) 517 ignoreUnexportedCoord := cmpopts.IgnoreUnexported(file.Coordinates{}) 518 ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") 519 ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") 520 521 slices.SortFunc(expected, locationSorter) 522 slices.SortFunc(actual, locationSorter) 523 524 if d := cmp.Diff(expected, actual, 525 ignoreUnexported, 526 ignoreUnexportedCoord, 527 ignoreFS, 528 ignoreMetadata, 529 ); d != "" { 530 531 t.Errorf("unexpected locations (-want +got):\n%s", d) 532 } 533 534 } 535 536 // locationSorter always sorts only by path information since test fixtures here only have filesystem IDs 537 // for one side of the comparison (expected) and not the other (actual). 538 func locationSorter(a, b file.Location) int { 539 if a.AccessPath != b.AccessPath { 540 return strings.Compare(a.AccessPath, b.AccessPath) 541 } 542 543 return strings.Compare(a.RealPath, b.RealPath) 544 } 545 546 func TestSquashResolver_AllLocations(t *testing.T) { 547 img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") 548 549 resolver, err := NewFromContainerImageSquash(img) 550 assert.NoError(t, err) 551 552 paths := strset.New() 553 ctx, cancel := context.WithCancel(context.Background()) 554 defer cancel() 555 for loc := range resolver.AllLocations(ctx) { 556 paths.Add(loc.RealPath) 557 } 558 expected := []string{ 559 "/Dockerfile", 560 "/file-3.txt", 561 "/target", 562 "/target/file-2.txt", 563 } 564 565 // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. 566 // this isn't important for the test, so we remove them. 567 paths.Remove("/proc", "/sys", "/dev", "/etc") 568 569 // Remove cache created by Mac Rosetta when emulating different arches 570 paths.Remove("/.cache/rosetta", "/.cache") 571 572 pathsList := paths.List() 573 sort.Strings(pathsList) 574 575 assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) 576 }