github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/internal/fileresolver/container_image_squash_test.go (about) 1 package fileresolver 2 3 import ( 4 "io" 5 "sort" 6 "testing" 7 8 "github.com/google/go-cmp/cmp" 9 "github.com/google/go-cmp/cmp/cmpopts" 10 "github.com/scylladb/go-set/strset" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 14 "github.com/anchore/stereoscope/pkg/imagetest" 15 "github.com/anchore/syft/syft/file" 16 ) 17 18 func TestImageSquashResolver_FilesByPath(t *testing.T) { 19 cases := []struct { 20 name string 21 linkPath string 22 resolveLayer uint 23 resolvePath string 24 forcePositiveHasPath bool 25 }{ 26 { 27 name: "link with previous data", 28 linkPath: "/link-1", 29 resolveLayer: 1, 30 resolvePath: "/file-1.txt", 31 }, 32 { 33 name: "link with in layer data", 34 linkPath: "/link-within", 35 resolveLayer: 5, 36 resolvePath: "/file-3.txt", 37 }, 38 { 39 name: "link with overridden data", 40 linkPath: "/link-2", 41 resolveLayer: 7, 42 resolvePath: "/file-2.txt", 43 }, 44 { 45 name: "indirect link (with overridden data)", 46 linkPath: "/link-indirect", 47 resolveLayer: 7, 48 resolvePath: "/file-2.txt", 49 }, 50 { 51 name: "dead link", 52 linkPath: "/link-dead", 53 resolveLayer: 8, 54 resolvePath: "", 55 // the path should exist, even if the link is dead 56 forcePositiveHasPath: true, 57 }, 58 { 59 name: "ignore directories", 60 linkPath: "/bin", 61 resolvePath: "", 62 // the path should exist, even if we ignore it 63 forcePositiveHasPath: true, 64 }, 65 { 66 name: "parent is a link (with overridden data)", 67 linkPath: "/parent-link/file-4.txt", 68 resolveLayer: 11, 69 resolvePath: "/parent/file-4.txt", 70 }, 71 } 72 for _, c := range cases { 73 t.Run(c.name, func(t *testing.T) { 74 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 75 76 resolver, err := NewFromContainerImageSquash(img) 77 if err != nil { 78 t.Fatalf("could not create resolver: %+v", err) 79 } 80 81 hasPath := resolver.HasPath(c.linkPath) 82 if !c.forcePositiveHasPath { 83 if c.resolvePath != "" && !hasPath { 84 t.Errorf("expected HasPath() to indicate existance, but did not") 85 } else if c.resolvePath == "" && hasPath { 86 t.Errorf("expeced HasPath() to NOT indicate existance, but does") 87 } 88 } else if !hasPath { 89 t.Errorf("expected HasPath() to indicate existance, but did not (force path)") 90 } 91 92 refs, err := resolver.FilesByPath(c.linkPath) 93 if err != nil { 94 t.Fatalf("could not use resolver: %+v", err) 95 } 96 97 expectedRefs := 1 98 if c.resolvePath == "" { 99 expectedRefs = 0 100 } 101 102 if len(refs) != expectedRefs { 103 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 104 } 105 106 if expectedRefs == 0 { 107 // nothing else to assert 108 return 109 } 110 111 actual := refs[0] 112 113 if string(actual.Reference().RealPath) != c.resolvePath { 114 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 115 } 116 117 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 118 t.Errorf("we should always prefer real paths over ones with links") 119 } 120 121 layer := img.FileCatalog.Layer(actual.Reference()) 122 123 if layer.Metadata.Index != c.resolveLayer { 124 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 125 } 126 }) 127 } 128 } 129 130 func TestImageSquashResolver_FilesByGlob(t *testing.T) { 131 cases := []struct { 132 name string 133 glob string 134 resolveLayer uint 135 resolvePath string 136 }{ 137 { 138 name: "link with previous data", 139 glob: "**/link-1", 140 resolveLayer: 1, 141 resolvePath: "/file-1.txt", 142 }, 143 { 144 name: "link with in layer data", 145 glob: "**/link-within", 146 resolveLayer: 5, 147 resolvePath: "/file-3.txt", 148 }, 149 { 150 name: "link with overridden data", 151 glob: "**/link-2", 152 resolveLayer: 7, 153 resolvePath: "/file-2.txt", 154 }, 155 { 156 name: "indirect link (with overridden data)", 157 glob: "**/link-indirect", 158 resolveLayer: 7, 159 resolvePath: "/file-2.txt", 160 }, 161 { 162 name: "dead link", 163 glob: "**/link-dead", 164 // dead links are dead! they shouldn't match on globs 165 resolvePath: "", 166 }, 167 { 168 name: "ignore directories", 169 glob: "**/bin", 170 resolvePath: "", 171 }, 172 { 173 name: "parent without link", 174 glob: "**/parent/*.txt", 175 resolveLayer: 11, 176 resolvePath: "/parent/file-4.txt", 177 }, 178 { 179 name: "parent is a link (override)", 180 glob: "**/parent-link/file-4.txt", 181 resolveLayer: 11, 182 resolvePath: "/parent/file-4.txt", 183 }, 184 } 185 for _, c := range cases { 186 t.Run(c.name, func(t *testing.T) { 187 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 188 189 resolver, err := NewFromContainerImageSquash(img) 190 if err != nil { 191 t.Fatalf("could not create resolver: %+v", err) 192 } 193 194 refs, err := resolver.FilesByGlob(c.glob) 195 if err != nil { 196 t.Fatalf("could not use resolver: %+v", err) 197 } 198 199 expectedRefs := 1 200 if c.resolvePath == "" { 201 expectedRefs = 0 202 } 203 204 if len(refs) != expectedRefs { 205 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 206 } 207 208 if expectedRefs == 0 { 209 // nothing else to assert 210 return 211 } 212 213 actual := refs[0] 214 215 if string(actual.Reference().RealPath) != c.resolvePath { 216 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 217 } 218 219 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 220 t.Errorf("we should always prefer real paths over ones with links") 221 } 222 223 layer := img.FileCatalog.Layer(actual.Reference()) 224 225 if layer.Metadata.Index != c.resolveLayer { 226 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 227 } 228 }) 229 } 230 } 231 232 func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) { 233 234 tests := []struct { 235 fixtureName string 236 mimeType string 237 expectedPaths *strset.Set 238 }{ 239 { 240 fixtureName: "image-simple", 241 mimeType: "text/plain", 242 expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), 243 }, 244 } 245 246 for _, test := range tests { 247 t.Run(test.fixtureName, func(t *testing.T) { 248 img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) 249 250 resolver, err := NewFromContainerImageSquash(img) 251 assert.NoError(t, err) 252 253 locations, err := resolver.FilesByMIMEType(test.mimeType) 254 assert.NoError(t, err) 255 256 assert.Len(t, locations, test.expectedPaths.Size()) 257 for _, l := range locations { 258 assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) 259 } 260 }) 261 } 262 } 263 264 func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { 265 img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") 266 267 resolver, err := NewFromContainerImageSquash(img) 268 assert.NoError(t, err) 269 270 locations, err := resolver.FilesByMIMEType("text/plain") 271 assert.NoError(t, err) 272 assert.NotEmpty(t, locations) 273 for _, location := range locations { 274 assert.NotEmpty(t, location.FileSystemID) 275 } 276 277 locations, err = resolver.FilesByGlob("*.txt") 278 assert.NoError(t, err) 279 assert.NotEmpty(t, locations) 280 for _, location := range locations { 281 assert.NotEmpty(t, location.FileSystemID) 282 } 283 284 locations, err = resolver.FilesByPath("/somefile-1.txt") 285 assert.NoError(t, err) 286 assert.NotEmpty(t, locations) 287 for _, location := range locations { 288 assert.NotEmpty(t, location.FileSystemID) 289 } 290 291 } 292 293 func TestSquashImageResolver_FilesContents(t *testing.T) { 294 295 tests := []struct { 296 name string 297 path string 298 contents []string 299 }{ 300 { 301 name: "one degree", 302 path: "link-2", 303 contents: []string{ 304 "NEW file override!", // always from the squashed perspective 305 }, 306 }, 307 { 308 name: "two degrees", 309 path: "link-indirect", 310 contents: []string{ 311 "NEW file override!", // always from the squashed perspective 312 }, 313 }, 314 { 315 name: "dead link", 316 path: "link-dead", 317 contents: []string{}, 318 }, 319 } 320 321 for _, test := range tests { 322 t.Run(test.name, func(t *testing.T) { 323 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 324 325 resolver, err := NewFromContainerImageSquash(img) 326 assert.NoError(t, err) 327 328 refs, err := resolver.FilesByPath(test.path) 329 require.NoError(t, err) 330 assert.Len(t, refs, len(test.contents)) 331 332 for idx, loc := range refs { 333 334 reader, err := resolver.FileContentsByLocation(loc) 335 require.NoError(t, err) 336 337 actual, err := io.ReadAll(reader) 338 require.NoError(t, err) 339 340 assert.Equal(t, test.contents[idx], string(actual)) 341 } 342 }) 343 } 344 } 345 346 func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { 347 348 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 349 350 resolver, err := NewFromContainerImageSquash(img) 351 assert.NoError(t, err) 352 353 var dirLoc *file.Location 354 for loc := range resolver.AllLocations() { 355 entry, err := resolver.img.FileCatalog.Get(loc.Reference()) 356 require.NoError(t, err) 357 if entry.Metadata.IsDir() { 358 dirLoc = &loc 359 break 360 } 361 } 362 363 require.NotNil(t, dirLoc) 364 365 reader, err := resolver.FileContentsByLocation(*dirLoc) 366 require.Error(t, err) 367 require.Nil(t, reader) 368 } 369 370 func Test_imageSquashResolver_resolvesLinks(t *testing.T) { 371 tests := []struct { 372 name string 373 runner func(file.Resolver) []file.Location 374 expected []file.Location 375 }{ 376 { 377 name: "by mimetype", 378 runner: func(resolver file.Resolver) []file.Location { 379 // links should not show up when searching mimetype 380 actualLocations, err := resolver.FilesByMIMEType("text/plain") 381 assert.NoError(t, err) 382 return actualLocations 383 }, 384 expected: []file.Location{ 385 file.NewVirtualLocation("/etc/group", "/etc/group"), 386 file.NewVirtualLocation("/etc/passwd", "/etc/passwd"), 387 file.NewVirtualLocation("/etc/shadow", "/etc/shadow"), 388 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 389 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 390 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 391 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 392 }, 393 }, 394 { 395 name: "by glob to links", 396 runner: func(resolver file.Resolver) []file.Location { 397 // links are searched, but resolve to the real files 398 actualLocations, err := resolver.FilesByGlob("*ink-*") 399 assert.NoError(t, err) 400 return actualLocations 401 }, 402 expected: []file.Location{ 403 file.NewVirtualLocation("/file-1.txt", "/link-1"), 404 file.NewVirtualLocation("/file-2.txt", "/link-2"), 405 406 // though this is a link, and it matches to the file, the resolver de-duplicates files 407 // by the real path, so it is not included in the results 408 //file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 409 410 file.NewVirtualLocation("/file-3.txt", "/link-within"), 411 }, 412 }, 413 { 414 name: "by basename", 415 runner: func(resolver file.Resolver) []file.Location { 416 // links are searched, but resolve to the real files 417 actualLocations, err := resolver.FilesByGlob("**/file-2.txt") 418 assert.NoError(t, err) 419 return actualLocations 420 }, 421 expected: []file.Location{ 422 // this has two copies in the base image, which overwrites the same location 423 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 424 }, 425 }, 426 { 427 name: "by basename glob", 428 runner: func(resolver file.Resolver) []file.Location { 429 // links are searched, but resolve to the real files 430 actualLocations, err := resolver.FilesByGlob("**/file-?.txt") 431 assert.NoError(t, err) 432 return actualLocations 433 }, 434 expected: []file.Location{ 435 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 436 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 437 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 438 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 439 }, 440 }, 441 { 442 name: "by basename glob to links", 443 runner: func(resolver file.Resolver) []file.Location { 444 actualLocations, err := resolver.FilesByGlob("**/link-*") 445 assert.NoError(t, err) 446 return actualLocations 447 }, 448 expected: []file.Location{ 449 file.NewVirtualLocation("/file-1.txt", "/link-1"), 450 file.NewVirtualLocation("/file-2.txt", "/link-2"), 451 452 // we already have this real file path via another link, so only one is returned 453 // file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 454 455 file.NewVirtualLocation("/file-3.txt", "/link-within"), 456 }, 457 }, 458 { 459 name: "by extension", 460 runner: func(resolver file.Resolver) []file.Location { 461 // links are searched, but resolve to the real files 462 actualLocations, err := resolver.FilesByGlob("**/*.txt") 463 assert.NoError(t, err) 464 return actualLocations 465 }, 466 expected: []file.Location{ 467 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 468 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 469 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 470 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 471 }, 472 }, 473 { 474 name: "by path to degree 1 link", 475 runner: func(resolver file.Resolver) []file.Location { 476 // links resolve to the final file 477 actualLocations, err := resolver.FilesByPath("/link-2") 478 assert.NoError(t, err) 479 return actualLocations 480 }, 481 expected: []file.Location{ 482 // we have multiple copies across layers 483 file.NewVirtualLocation("/file-2.txt", "/link-2"), 484 }, 485 }, 486 { 487 name: "by path to degree 2 link", 488 runner: func(resolver file.Resolver) []file.Location { 489 // multiple links resolves to the final file 490 actualLocations, err := resolver.FilesByPath("/link-indirect") 491 assert.NoError(t, err) 492 return actualLocations 493 }, 494 expected: []file.Location{ 495 // we have multiple copies across layers 496 file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 497 }, 498 }, 499 } 500 501 for _, test := range tests { 502 t.Run(test.name, func(t *testing.T) { 503 504 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 505 506 resolver, err := NewFromContainerImageSquash(img) 507 assert.NoError(t, err) 508 509 actual := test.runner(resolver) 510 511 compareLocations(t, test.expected, actual) 512 }) 513 } 514 515 } 516 517 func compareLocations(t *testing.T, expected, actual []file.Location) { 518 t.Helper() 519 ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{}) 520 ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") 521 ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") 522 523 sort.Sort(file.Locations(expected)) 524 sort.Sort(file.Locations(actual)) 525 526 if d := cmp.Diff(expected, actual, 527 ignoreUnexported, 528 ignoreFS, 529 ignoreMetadata, 530 ); d != "" { 531 532 t.Errorf("unexpected locations (-want +got):\n%s", d) 533 } 534 535 } 536 537 func TestSquashResolver_AllLocations(t *testing.T) { 538 img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") 539 540 resolver, err := NewFromContainerImageSquash(img) 541 assert.NoError(t, err) 542 543 paths := strset.New() 544 for loc := range resolver.AllLocations() { 545 paths.Add(loc.RealPath) 546 } 547 expected := []string{ 548 "/Dockerfile", 549 "/file-3.txt", 550 "/target", 551 "/target/file-2.txt", 552 } 553 554 // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. 555 // this isn't important for the test, so we remove them. 556 paths.Remove("/proc", "/sys", "/dev", "/etc") 557 558 pathsList := paths.List() 559 sort.Strings(pathsList) 560 561 assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) 562 }