github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/internal/fileresolver/container_image_squash_test.go (about) 1 package fileresolver 2 3 import ( 4 "io" 5 "sort" 6 "testing" 7 8 "github.com/google/go-cmp/cmp" 9 "github.com/nextlinux/gosbom/gosbom/file" 10 "github.com/scylladb/go-set/strset" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 14 "github.com/anchore/stereoscope/pkg/imagetest" 15 ) 16 17 func TestImageSquashResolver_FilesByPath(t *testing.T) { 18 cases := []struct { 19 name string 20 linkPath string 21 resolveLayer uint 22 resolvePath string 23 forcePositiveHasPath bool 24 }{ 25 { 26 name: "link with previous data", 27 linkPath: "/link-1", 28 resolveLayer: 1, 29 resolvePath: "/file-1.txt", 30 }, 31 { 32 name: "link with in layer data", 33 linkPath: "/link-within", 34 resolveLayer: 5, 35 resolvePath: "/file-3.txt", 36 }, 37 { 38 name: "link with overridden data", 39 linkPath: "/link-2", 40 resolveLayer: 7, 41 resolvePath: "/file-2.txt", 42 }, 43 { 44 name: "indirect link (with overridden data)", 45 linkPath: "/link-indirect", 46 resolveLayer: 7, 47 resolvePath: "/file-2.txt", 48 }, 49 { 50 name: "dead link", 51 linkPath: "/link-dead", 52 resolveLayer: 8, 53 resolvePath: "", 54 // the path should exist, even if the link is dead 55 forcePositiveHasPath: true, 56 }, 57 { 58 name: "ignore directories", 59 linkPath: "/bin", 60 resolvePath: "", 61 // the path should exist, even if we ignore it 62 forcePositiveHasPath: true, 63 }, 64 { 65 name: "parent is a link (with overridden data)", 66 linkPath: "/parent-link/file-4.txt", 67 resolveLayer: 11, 68 resolvePath: "/parent/file-4.txt", 69 }, 70 } 71 for _, c := range cases { 72 t.Run(c.name, func(t *testing.T) { 73 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 74 75 resolver, err := NewFromContainerImageSquash(img) 76 if err != nil { 77 t.Fatalf("could not create resolver: %+v", err) 78 } 79 80 hasPath := resolver.HasPath(c.linkPath) 81 if !c.forcePositiveHasPath { 82 if c.resolvePath != "" && !hasPath { 83 t.Errorf("expected HasPath() to indicate existance, but did not") 84 } else if c.resolvePath == "" && hasPath { 85 t.Errorf("expeced HasPath() to NOT indicate existance, but does") 86 } 87 } else if !hasPath { 88 t.Errorf("expected HasPath() to indicate existance, but did not (force path)") 89 } 90 91 refs, err := resolver.FilesByPath(c.linkPath) 92 if err != nil { 93 t.Fatalf("could not use resolver: %+v", err) 94 } 95 96 expectedRefs := 1 97 if c.resolvePath == "" { 98 expectedRefs = 0 99 } 100 101 if len(refs) != expectedRefs { 102 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 103 } 104 105 if expectedRefs == 0 { 106 // nothing else to assert 107 return 108 } 109 110 actual := refs[0] 111 112 if string(actual.Reference().RealPath) != c.resolvePath { 113 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 114 } 115 116 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 117 t.Errorf("we should always prefer real paths over ones with links") 118 } 119 120 layer := img.FileCatalog.Layer(actual.Reference()) 121 122 if layer.Metadata.Index != c.resolveLayer { 123 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 124 } 125 }) 126 } 127 } 128 129 func TestImageSquashResolver_FilesByGlob(t *testing.T) { 130 cases := []struct { 131 name string 132 glob string 133 resolveLayer uint 134 resolvePath string 135 }{ 136 { 137 name: "link with previous data", 138 glob: "**/link-1", 139 resolveLayer: 1, 140 resolvePath: "/file-1.txt", 141 }, 142 { 143 name: "link with in layer data", 144 glob: "**/link-within", 145 resolveLayer: 5, 146 resolvePath: "/file-3.txt", 147 }, 148 { 149 name: "link with overridden data", 150 glob: "**/link-2", 151 resolveLayer: 7, 152 resolvePath: "/file-2.txt", 153 }, 154 { 155 name: "indirect link (with overridden data)", 156 glob: "**/link-indirect", 157 resolveLayer: 7, 158 resolvePath: "/file-2.txt", 159 }, 160 { 161 name: "dead link", 162 glob: "**/link-dead", 163 // dead links are dead! they shouldn't match on globs 164 resolvePath: "", 165 }, 166 { 167 name: "ignore directories", 168 glob: "**/bin", 169 resolvePath: "", 170 }, 171 { 172 name: "parent without link", 173 glob: "**/parent/*.txt", 174 resolveLayer: 11, 175 resolvePath: "/parent/file-4.txt", 176 }, 177 { 178 name: "parent is a link (override)", 179 glob: "**/parent-link/file-4.txt", 180 resolveLayer: 11, 181 resolvePath: "/parent/file-4.txt", 182 }, 183 } 184 for _, c := range cases { 185 t.Run(c.name, func(t *testing.T) { 186 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 187 188 resolver, err := NewFromContainerImageSquash(img) 189 if err != nil { 190 t.Fatalf("could not create resolver: %+v", err) 191 } 192 193 refs, err := resolver.FilesByGlob(c.glob) 194 if err != nil { 195 t.Fatalf("could not use resolver: %+v", err) 196 } 197 198 expectedRefs := 1 199 if c.resolvePath == "" { 200 expectedRefs = 0 201 } 202 203 if len(refs) != expectedRefs { 204 t.Fatalf("unexpected number of resolutions: %d", len(refs)) 205 } 206 207 if expectedRefs == 0 { 208 // nothing else to assert 209 return 210 } 211 212 actual := refs[0] 213 214 if string(actual.Reference().RealPath) != c.resolvePath { 215 t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) 216 } 217 218 if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { 219 t.Errorf("we should always prefer real paths over ones with links") 220 } 221 222 layer := img.FileCatalog.Layer(actual.Reference()) 223 224 if layer.Metadata.Index != c.resolveLayer { 225 t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) 226 } 227 }) 228 } 229 } 230 231 func Test_imageSquashResolver_FilesByMIMEType(t *testing.T) { 232 233 tests := []struct { 234 fixtureName string 235 mimeType string 236 expectedPaths *strset.Set 237 }{ 238 { 239 fixtureName: "image-simple", 240 mimeType: "text/plain", 241 expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), 242 }, 243 } 244 245 for _, test := range tests { 246 t.Run(test.fixtureName, func(t *testing.T) { 247 img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) 248 249 resolver, err := NewFromContainerImageSquash(img) 250 assert.NoError(t, err) 251 252 locations, err := resolver.FilesByMIMEType(test.mimeType) 253 assert.NoError(t, err) 254 255 assert.Len(t, locations, test.expectedPaths.Size()) 256 for _, l := range locations { 257 assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) 258 } 259 }) 260 } 261 } 262 263 func Test_imageSquashResolver_hasFilesystemIDInLocation(t *testing.T) { 264 img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") 265 266 resolver, err := NewFromContainerImageSquash(img) 267 assert.NoError(t, err) 268 269 locations, err := resolver.FilesByMIMEType("text/plain") 270 assert.NoError(t, err) 271 assert.NotEmpty(t, locations) 272 for _, location := range locations { 273 assert.NotEmpty(t, location.FileSystemID) 274 } 275 276 locations, err = resolver.FilesByGlob("*.txt") 277 assert.NoError(t, err) 278 assert.NotEmpty(t, locations) 279 for _, location := range locations { 280 assert.NotEmpty(t, location.FileSystemID) 281 } 282 283 locations, err = resolver.FilesByPath("/somefile-1.txt") 284 assert.NoError(t, err) 285 assert.NotEmpty(t, locations) 286 for _, location := range locations { 287 assert.NotEmpty(t, location.FileSystemID) 288 } 289 290 } 291 292 func TestSquashImageResolver_FilesContents(t *testing.T) { 293 294 tests := []struct { 295 name string 296 path string 297 contents []string 298 }{ 299 { 300 name: "one degree", 301 path: "link-2", 302 contents: []string{ 303 "NEW file override!", // always from the squashed perspective 304 }, 305 }, 306 { 307 name: "two degrees", 308 path: "link-indirect", 309 contents: []string{ 310 "NEW file override!", // always from the squashed perspective 311 }, 312 }, 313 { 314 name: "dead link", 315 path: "link-dead", 316 contents: []string{}, 317 }, 318 } 319 320 for _, test := range tests { 321 t.Run(test.name, func(t *testing.T) { 322 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 323 324 resolver, err := NewFromContainerImageSquash(img) 325 assert.NoError(t, err) 326 327 refs, err := resolver.FilesByPath(test.path) 328 require.NoError(t, err) 329 assert.Len(t, refs, len(test.contents)) 330 331 for idx, loc := range refs { 332 333 reader, err := resolver.FileContentsByLocation(loc) 334 require.NoError(t, err) 335 336 actual, err := io.ReadAll(reader) 337 require.NoError(t, err) 338 339 assert.Equal(t, test.contents[idx], string(actual)) 340 } 341 }) 342 } 343 } 344 345 func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { 346 347 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 348 349 resolver, err := NewFromContainerImageSquash(img) 350 assert.NoError(t, err) 351 352 var dirLoc *file.Location 353 for loc := range resolver.AllLocations() { 354 entry, err := resolver.img.FileCatalog.Get(loc.Reference()) 355 require.NoError(t, err) 356 if entry.Metadata.IsDir() { 357 dirLoc = &loc 358 break 359 } 360 } 361 362 require.NotNil(t, dirLoc) 363 364 reader, err := resolver.FileContentsByLocation(*dirLoc) 365 require.Error(t, err) 366 require.Nil(t, reader) 367 } 368 369 func Test_imageSquashResolver_resolvesLinks(t *testing.T) { 370 tests := []struct { 371 name string 372 runner func(file.Resolver) []file.Location 373 expected []file.Location 374 }{ 375 { 376 name: "by mimetype", 377 runner: func(resolver file.Resolver) []file.Location { 378 // links should not show up when searching mimetype 379 actualLocations, err := resolver.FilesByMIMEType("text/plain") 380 assert.NoError(t, err) 381 return actualLocations 382 }, 383 expected: []file.Location{ 384 file.NewVirtualLocation("/etc/group", "/etc/group"), 385 file.NewVirtualLocation("/etc/passwd", "/etc/passwd"), 386 file.NewVirtualLocation("/etc/shadow", "/etc/shadow"), 387 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 388 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 389 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 390 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 391 }, 392 }, 393 { 394 name: "by glob to links", 395 runner: func(resolver file.Resolver) []file.Location { 396 // links are searched, but resolve to the real files 397 actualLocations, err := resolver.FilesByGlob("*ink-*") 398 assert.NoError(t, err) 399 return actualLocations 400 }, 401 expected: []file.Location{ 402 file.NewVirtualLocation("/file-1.txt", "/link-1"), 403 file.NewVirtualLocation("/file-2.txt", "/link-2"), 404 405 // though this is a link, and it matches to the file, the resolver de-duplicates files 406 // by the real path, so it is not included in the results 407 //file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 408 409 file.NewVirtualLocation("/file-3.txt", "/link-within"), 410 }, 411 }, 412 { 413 name: "by basename", 414 runner: func(resolver file.Resolver) []file.Location { 415 // links are searched, but resolve to the real files 416 actualLocations, err := resolver.FilesByGlob("**/file-2.txt") 417 assert.NoError(t, err) 418 return actualLocations 419 }, 420 expected: []file.Location{ 421 // this has two copies in the base image, which overwrites the same location 422 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 423 }, 424 }, 425 { 426 name: "by basename glob", 427 runner: func(resolver file.Resolver) []file.Location { 428 // links are searched, but resolve to the real files 429 actualLocations, err := resolver.FilesByGlob("**/file-?.txt") 430 assert.NoError(t, err) 431 return actualLocations 432 }, 433 expected: []file.Location{ 434 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 435 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 436 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 437 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 438 }, 439 }, 440 { 441 name: "by basename glob to links", 442 runner: func(resolver file.Resolver) []file.Location { 443 actualLocations, err := resolver.FilesByGlob("**/link-*") 444 assert.NoError(t, err) 445 return actualLocations 446 }, 447 expected: []file.Location{ 448 file.NewVirtualLocation("/file-1.txt", "/link-1"), 449 file.NewVirtualLocation("/file-2.txt", "/link-2"), 450 451 // we already have this real file path via another link, so only one is returned 452 // file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 453 454 file.NewVirtualLocation("/file-3.txt", "/link-within"), 455 }, 456 }, 457 { 458 name: "by extension", 459 runner: func(resolver file.Resolver) []file.Location { 460 // links are searched, but resolve to the real files 461 actualLocations, err := resolver.FilesByGlob("**/*.txt") 462 assert.NoError(t, err) 463 return actualLocations 464 }, 465 expected: []file.Location{ 466 file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), 467 file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), 468 file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), 469 file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), 470 }, 471 }, 472 { 473 name: "by path to degree 1 link", 474 runner: func(resolver file.Resolver) []file.Location { 475 // links resolve to the final file 476 actualLocations, err := resolver.FilesByPath("/link-2") 477 assert.NoError(t, err) 478 return actualLocations 479 }, 480 expected: []file.Location{ 481 // we have multiple copies across layers 482 file.NewVirtualLocation("/file-2.txt", "/link-2"), 483 }, 484 }, 485 { 486 name: "by path to degree 2 link", 487 runner: func(resolver file.Resolver) []file.Location { 488 // multiple links resolves to the final file 489 actualLocations, err := resolver.FilesByPath("/link-indirect") 490 assert.NoError(t, err) 491 return actualLocations 492 }, 493 expected: []file.Location{ 494 // we have multiple copies across layers 495 file.NewVirtualLocation("/file-2.txt", "/link-indirect"), 496 }, 497 }, 498 } 499 500 for _, test := range tests { 501 t.Run(test.name, func(t *testing.T) { 502 503 img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") 504 505 resolver, err := NewFromContainerImageSquash(img) 506 assert.NoError(t, err) 507 508 actual := test.runner(resolver) 509 510 compareLocations(t, test.expected, actual) 511 }) 512 } 513 514 } 515 516 func TestSquashResolver_AllLocations(t *testing.T) { 517 img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") 518 519 resolver, err := NewFromContainerImageSquash(img) 520 assert.NoError(t, err) 521 522 paths := strset.New() 523 for loc := range resolver.AllLocations() { 524 paths.Add(loc.RealPath) 525 } 526 expected := []string{ 527 "/Dockerfile", 528 "/file-3.txt", 529 "/target", 530 "/target/file-2.txt", 531 } 532 533 // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. 534 // this isn't important for the test, so we remove them. 535 paths.Remove("/proc", "/sys", "/dev", "/etc") 536 537 pathsList := paths.List() 538 sort.Strings(pathsList) 539 540 assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) 541 }