github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/internal/fileresolver/container_image_squash_test.go (about) 1 package fileresolver 2 3 import ( 4 "io" 5 "sort" 6 "testing" 7 8 "github.com/google/go-cmp/cmp" 9 "github.com/google/go-cmp/cmp/cmpopts" 10 "github.com/scylladb/go-set/strset" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 14 "github.com/anchore/stereoscope/pkg/imagetest" 15 "github.com/anchore/syft/syft/file" 16 ) 17 18 func TestImageSquashResolver_FilesByPath(t *testing.T) { 19 cases := []struct { 20 name string 21 linkPath string 22 resolveLayer uint 23 resolvePath string 24 forcePositiveHasPath bool 25 }{ 26 { 27 name: "link with previous data", 28 linkPath: "/link-1", 29 resolveLayer: 1, 30 resolvePath: "/file-1.txt", 31 }, 32 { 33 name: "link with in layer data", 34 linkPath: "/link-within", 35 resolveLayer: 5, 36 resolvePath: "/file-3.txt", 37 }, 38 { 39 name: "link with overridden data", 40 linkPath: "/link-2", 41 resolveLayer: 7, 42 resolvePath: "/file-2.txt", 43 }, 44 { 45 name: "indirect link (with overridden data)", 46 linkPath: "/link-indirect", 47 resolveLayer: 7, 48 resolvePath: "/file-2.txt", 49 }, 50 { 51 name: "dead link", 52 linkPath: "/link-dead", 53 resolveLayer: 8, 54 resolvePath: "", 55 // the path should exist, even if the link is dead 56 forcePositiveHasPath: true, 57 }, 58 { 59 name: "ignore directories", 60 linkPath: "/bin", 61 resolvePath: "", 62 // the path should exist, even if we ignore it 63 forcePositiveHasPath: true, 64 }, 65 { 66 name: "parent is a link (with overridden data)", 67 linkPath: "/parent-link/file-4.txt", 68 resolveLayer: 11, 69 resolvePath: "/parent/file-4.txt", 70 }, 71 } 72 for _, c := range cases { 73 t.Run(c.name, func(t *testing.T) { 74 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 75 76 resolver, err := NewFromContainerImageSquash(img) 77 require.NoError(t, err) 78 79 hasPath := resolver.HasPath(c.linkPath) 80 if !c.forcePositiveHasPath { 81 if c.resolvePath != "" && !hasPath { 82 t.Errorf("expected HasPath() to indicate existance, but did not") 83 } else if c.resolvePath == "" && hasPath { 84 t.Errorf("expeced HasPath() to NOT indicate existance, but does") 85 } 86 } else if !hasPath { 87 t.Errorf("expected HasPath() to indicate existance, but did not (force path)") 88 } 89 90 refs, err := resolver.FilesByPath(c.linkPath) 91 require.NoError(t, err) 92 93 expectedRefs := 1 94 if c.resolvePath == "" { 95 expectedRefs = 0 96 } 97 98 if len(refs) != expectedRefs { 99 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 100 } 101 102 if expectedRefs == 0 { 103 // nothing else to assert 104 return 105 } 106 107 actual := refs[0] 108 109 if string(actual.Reference().RealPath) != c.resolvePath { 110 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 111 } 112 113 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 114 t.Errorf("we should always prefer real paths over ones with links") 115 } 116 117 layer := img.FileCatalog.Layer(actual.Reference()) 118 119 if layer.Metadata.Index != c.resolveLayer { 120 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 121 } 122 }) 123 } 124 } 125 126 func TestImageSquashResolver_FilesByGlob(t *testing.T) { 127 cases := []struct { 128 name string 129 glob string 130 resolveLayer uint 131 resolvePath string 132 }{ 133 { 134 name: "link with previous data", 135 glob: "**/link-1", 136 resolveLayer: 1, 137 resolvePath: "/file-1.txt", 138 }, 139 { 140 name: "link with in layer data", 141 glob: "**/link-within", 142 resolveLayer: 5, 143 resolvePath: "/file-3.txt", 144 }, 145 { 146 name: "link with overridden data", 147 glob: "**/link-2", 148 resolveLayer: 7, 149 resolvePath: "/file-2.txt", 150 }, 151 { 152 name: "indirect link (with overridden data)", 153 glob: "**/link-indirect", 154 resolveLayer: 7, 155 resolvePath: "/file-2.txt", 156 }, 157 { 158 name: "dead link", 159 glob: "**/link-dead", 160 // dead links are dead! they shouldn't match on globs 161 resolvePath: "", 162 }, 163 { 164 name: "ignore directories", 165 glob: "**/bin", 166 resolvePath: "", 167 }, 168 { 169 name: "parent without link", 170 glob: "**/parent/*.txt", 171 resolveLayer: 11, 172 resolvePath: "/parent/file-4.txt", 173 }, 174 { 175 name: "parent is a link (override)", 176 glob: "**/parent-link/file-4.txt", 177 resolveLayer: 11, 178 resolvePath: "/parent/file-4.txt", 179 }, 180 } 181 for _, c := range cases { 182 t.Run(c.name, func(t *testing.T) { 183 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 184 185 resolver, err := NewFromContainerImageSquash(img) 186 require.NoError(t, err) 187 188 refs, err := resolver.FilesByGlob(c.glob) 189 require.NoError(t, err) 190 191 expectedRefs := 1 192 if c.resolvePath == "" { 193 expectedRefs = 0 194 } 195 196 if len(refs) != expectedRefs { 197 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 198 } 199 200 if expectedRefs == 0 { 201 // nothing else to assert 202 return 203 } 204 205 actual := refs[0] 206 207 if string(actual.Reference().RealPath) != c.resolvePath { 208 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 209 } 210 211 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 212 t.Errorf("we should always prefer real paths over ones with links") 213 } 214 215 layer := img.FileCatalog.Layer(actual.Reference()) 216 217 if layer.Metadata.Index != c.resolveLayer { 218 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 219 } 220 }) 221 } 222 } 223 224 func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) { 225 226 tests := []struct { 227 fixtureName string 228 mimeType string 229 expectedPaths *strset.Set 230 }{ 231 { 232 fixtureName: "image-simple", 233 mimeType: "text/plain", 234 expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), 235 }, 236 } 237 238 for _, test := range tests { 239 t.Run(test.fixtureName, func(t *testing.T) { 240 img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) 241 242 resolver, err := NewFromContainerImageSquash(img) 243 assert.NoError(t, err) 244 245 locations, err := resolver.FilesByMIMEType(test.mimeType) 246 assert.NoError(t, err) 247 248 assert.Len(t, locations, test.expectedPaths.Size()) 249 for _, l := range locations { 250 assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) 251 } 252 }) 253 } 254 } 255 256 func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { 257 img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") 258 259 resolver, err := NewFromContainerImageSquash(img) 260 assert.NoError(t, err) 261 262 locations, err := resolver.FilesByMIMEType("text/plain") 263 assert.NoError(t, err) 264 assert.NotEmpty(t, locations) 265 for _, location := range locations { 266 assert.NotEmpty(t, location.FileSystemID) 267 } 268 269 locations, err = resolver.FilesByGlob("*.txt") 270 assert.NoError(t, err) 271 assert.NotEmpty(t, locations) 272 for _, location := range locations { 273 assert.NotEmpty(t, location.FileSystemID) 274 } 275 276 locations, err = resolver.FilesByPath("/somefile-1.txt") 277 assert.NoError(t, err) 278 assert.NotEmpty(t, locations) 279 for _, location := range locations { 280 assert.NotEmpty(t, location.FileSystemID) 281 } 282 283 } 284 285 func TestSquashImageResolver_FilesContents(t *testing.T) { 286 287 tests := []struct { 288 name string 289 path string 290 contents []string 291 }{ 292 { 293 name: "one degree", 294 path: "link-2", 295 contents: []string{ 296 "NEW file override!", // always from the squashed perspective 297 }, 298 }, 299 { 300 name: "two degrees", 301 path: "link-indirect", 302 contents: []string{ 303 "NEW file override!", // always from the squashed perspective 304 }, 305 }, 306 { 307 name: "dead link", 308 path: "link-dead", 309 contents: []string{}, 310 }, 311 } 312 313 for _, test := range tests { 314 t.Run(test.name, func(t *testing.T) { 315 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 316 317 resolver, err := NewFromContainerImageSquash(img) 318 assert.NoError(t, err) 319 320 refs, err := resolver.FilesByPath(test.path) 321 require.NoError(t, err) 322 assert.Len(t, refs, len(test.contents)) 323 324 for idx, loc := range refs { 325 326 reader, err := resolver.FileContentsByLocation(loc) 327 require.NoError(t, err) 328 329 actual, err := io.ReadAll(reader) 330 require.NoError(t, err) 331 332 assert.Equal(t, test.contents[idx], string(actual)) 333 } 334 }) 335 } 336 } 337 338 func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { 339 340 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 341 342 resolver, err := NewFromContainerImageSquash(img) 343 assert.NoError(t, err) 344 345 var dirLoc *file.Location 346 for loc := range resolver.AllLocations() { 347 entry, err := resolver.img.FileCatalog.Get(loc.Reference()) 348 require.NoError(t, err) 349 if entry.Metadata.IsDir() { 350 dirLoc = &loc 351 break 352 } 353 } 354 355 require.NotNil(t, dirLoc) 356 357 reader, err := resolver.FileContentsByLocation(*dirLoc) 358 require.Error(t, err) 359 require.Nil(t, reader) 360 } 361 362 func Test_imageSquashResolver_resolvesLinks(t *testing.T) { 363 tests := []struct { 364 name string 365 runner func(file.Resolver) []file.Location 366 expected []file.Location 367 }{ 368 { 369 name: "by mimetype", 370 runner: func(resolver file.Resolver) []file.Location { 371 // links should not show up when searching mimetype 372 actualLocations, err := resolver.FilesByMIMEType("text/plain") 373 assert.NoError(t, err) 374 return actualLocations 375 }, 376 expected: []file.Location{ 377 file.NewVirtualLocation("/etc/group", "/etc/group"), 378 file.NewVirtualLocation("/etc/passwd", "/etc/passwd"), 379 file.NewVirtualLocation("/etc/shadow", "/etc/shadow"), 380 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 381 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 382 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 383 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 384 }, 385 }, 386 { 387 name: "by glob to links", 388 runner: func(resolver file.Resolver) []file.Location { 389 // links are searched, but resolve to the real files 390 actualLocations, err := resolver.FilesByGlob("*ink-*") 391 assert.NoError(t, err) 392 return actualLocations 393 }, 394 expected: []file.Location{ 395 file.NewVirtualLocation("/file-1.txt", "/link-1"), 396 file.NewVirtualLocation("/file-2.txt", "/link-2"), 397 398 // though this is a link, and it matches to the file, the resolver de-duplicates files 399 // by the real path, so it is not included in the results 400 //file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 401 402 file.NewVirtualLocation("/file-3.txt", "/link-within"), 403 }, 404 }, 405 { 406 name: "by basename", 407 runner: func(resolver file.Resolver) []file.Location { 408 // links are searched, but resolve to the real files 409 actualLocations, err := resolver.FilesByGlob("**/file-2.txt") 410 assert.NoError(t, err) 411 return actualLocations 412 }, 413 expected: []file.Location{ 414 // this has two copies in the base image, which overwrites the same location 415 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 416 }, 417 }, 418 { 419 name: "by basename glob", 420 runner: func(resolver file.Resolver) []file.Location { 421 // links are searched, but resolve to the real files 422 actualLocations, err := resolver.FilesByGlob("**/file-?.txt") 423 assert.NoError(t, err) 424 return actualLocations 425 }, 426 expected: []file.Location{ 427 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 428 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 429 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 430 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 431 }, 432 }, 433 { 434 name: "by basename glob to links", 435 runner: func(resolver file.Resolver) []file.Location { 436 actualLocations, err := resolver.FilesByGlob("**/link-*") 437 assert.NoError(t, err) 438 return actualLocations 439 }, 440 expected: []file.Location{ 441 file.NewVirtualLocation("/file-1.txt", "/link-1"), 442 file.NewVirtualLocation("/file-2.txt", "/link-2"), 443 444 // we already have this real file path via another link, so only one is returned 445 // file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 446 447 file.NewVirtualLocation("/file-3.txt", "/link-within"), 448 }, 449 }, 450 { 451 name: "by extension", 452 runner: func(resolver file.Resolver) []file.Location { 453 // links are searched, but resolve to the real files 454 actualLocations, err := resolver.FilesByGlob("**/*.txt") 455 assert.NoError(t, err) 456 return actualLocations 457 }, 458 expected: []file.Location{ 459 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 460 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 461 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 462 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 463 }, 464 }, 465 { 466 name: "by path to degree 1 link", 467 runner: func(resolver file.Resolver) []file.Location { 468 // links resolve to the final file 469 actualLocations, err := resolver.FilesByPath("/link-2") 470 assert.NoError(t, err) 471 return actualLocations 472 }, 473 expected: []file.Location{ 474 // we have multiple copies across layers 475 file.NewVirtualLocation("/file-2.txt", "/link-2"), 476 }, 477 }, 478 { 479 name: "by path to degree 2 link", 480 runner: func(resolver file.Resolver) []file.Location { 481 // multiple links resolves to the final file 482 actualLocations, err := resolver.FilesByPath("/link-indirect") 483 assert.NoError(t, err) 484 return actualLocations 485 }, 486 expected: []file.Location{ 487 // we have multiple copies across layers 488 file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 489 }, 490 }, 491 } 492 493 for _, test := range tests { 494 t.Run(test.name, func(t *testing.T) { 495 496 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 497 498 resolver, err := NewFromContainerImageSquash(img) 499 assert.NoError(t, err) 500 501 actual := test.runner(resolver) 502 503 compareLocations(t, test.expected, actual) 504 }) 505 } 506 507 } 508 509 func compareLocations(t *testing.T, expected, actual []file.Location) { 510 t.Helper() 511 ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{}) 512 ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") 513 ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") 514 515 sort.Sort(file.Locations(expected)) 516 sort.Sort(file.Locations(actual)) 517 518 if d := cmp.Diff(expected, actual, 519 ignoreUnexported, 520 ignoreFS, 521 ignoreMetadata, 522 ); d != "" { 523 524 t.Errorf("unexpected locations (-want +got):\n%s", d) 525 } 526 527 } 528 529 func TestSquashResolver_AllLocations(t *testing.T) { 530 img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") 531 532 resolver, err := NewFromContainerImageSquash(img) 533 assert.NoError(t, err) 534 535 paths := strset.New() 536 for loc := range resolver.AllLocations() { 537 paths.Add(loc.RealPath) 538 } 539 expected := []string{ 540 "/Dockerfile", 541 "/file-3.txt", 542 "/target", 543 "/target/file-2.txt", 544 } 545 546 // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. 547 // this isn't important for the test, so we remove them. 548 paths.Remove("/proc", "/sys", "/dev", "/etc") 549 550 pathsList := paths.List() 551 sort.Strings(pathsList) 552 553 assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) 554 }