github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/source/source.go (about) 1 /* 2 Package source provides an abstraction to allow a user to loosely define a data source to catalog and expose a common interface that 3 catalogers and use explore and analyze data from the data source. All valid (cataloggable) data sources are defined 4 within this package. 5 */ 6 package source 7 8 import ( 9 "context" 10 "fmt" 11 "os" 12 "path/filepath" 13 "strings" 14 "sync" 15 16 "github.com/bmatcuk/doublestar/v4" 17 "github.com/mholt/archiver/v3" 18 "github.com/nextlinux/gosbom/gosbom/artifact" 19 "github.com/nextlinux/gosbom/gosbom/file" 20 "github.com/nextlinux/gosbom/gosbom/internal/fileresolver" 21 "github.com/nextlinux/gosbom/internal/log" 22 digest "github.com/opencontainers/go-digest" 23 "github.com/spf13/afero" 24 25 "github.com/anchore/stereoscope" 26 "github.com/anchore/stereoscope/pkg/image" 27 ) 28 29 // Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used 30 // in cataloging (based on the data source and configuration) 31 type Source struct { 32 id artifact.ID `hash:"ignore"` 33 Image *image.Image `hash:"ignore"` // the image object to be cataloged (image only) 34 Metadata Metadata 35 directoryResolver *fileresolver.Directory `hash:"ignore"` 36 path string 37 base string 38 mutex *sync.Mutex 39 Exclusions []string `hash:"ignore"` 40 } 41 42 // Input is an object that captures the detected user input regarding source location, scheme, and provider type. 43 // It acts as a struct input for some source constructors. 44 type Input struct { 45 UserInput string 46 Scheme Scheme 47 ImageSource image.Source 48 Location string 49 Platform string 50 Name string 51 Version string 52 } 53 54 // ParseInput generates a source Input that can be used as an argument to generate a new source 55 // from specific providers including a registry. 56 func ParseInput(userInput string, platform string) (*Input, error) { 57 return ParseInputWithName(userInput, platform, "", "") 58 } 59 60 // ParseInputWithName generates a source Input that can be used as an argument to generate a new source 61 // from specific providers including a registry, with an explicit name. 62 func ParseInputWithName(userInput string, platform, name, defaultImageSource string) (*Input, error) { 63 return ParseInputWithNameVersion(userInput, platform, name, "", defaultImageSource) 64 } 65 66 // ParseInputWithNameVersion generates a source Input that can be used as an argument to generate a new source 67 // from specific providers including a registry, with an explicit name and version. 68 func ParseInputWithNameVersion(userInput, platform, name, version, defaultImageSource string) (*Input, error) { 69 fs := afero.NewOsFs() 70 scheme, source, location, err := DetectScheme(fs, image.DetectSource, userInput) 71 if err != nil { 72 return nil, err 73 } 74 75 if source == image.UnknownSource { 76 // only run for these two scheme 77 // only check on packages command, attest we automatically try to pull from userInput 78 switch scheme { 79 case ImageScheme, UnknownScheme: 80 scheme = ImageScheme 81 location = userInput 82 if defaultImageSource != "" { 83 source = parseDefaultImageSource(defaultImageSource) 84 } else { 85 imagePullSource := image.DetermineDefaultImagePullSource(userInput) 86 source = imagePullSource 87 } 88 if location == "" { 89 location = userInput 90 } 91 default: 92 } 93 } 94 95 if scheme != ImageScheme && platform != "" { 96 return nil, fmt.Errorf("cannot specify a platform for a non-image source") 97 } 98 99 // collect user input for downstream consumption 100 return &Input{ 101 UserInput: userInput, 102 Scheme: scheme, 103 ImageSource: source, 104 Location: location, 105 Platform: platform, 106 Name: name, 107 Version: version, 108 }, nil 109 } 110 111 func parseDefaultImageSource(defaultImageSource string) image.Source { 112 switch defaultImageSource { 113 case "registry": 114 return image.OciRegistrySource 115 case "docker": 116 return image.DockerDaemonSource 117 case "podman": 118 return image.PodmanDaemonSource 119 default: 120 return image.UnknownSource 121 } 122 } 123 124 type sourceDetector func(string) (image.Source, string, error) 125 126 func NewFromRegistry(in Input, registryOptions *image.RegistryOptions, exclusions []string) (*Source, func(), error) { 127 source, cleanupFn, err := generateImageSource(in, registryOptions) 128 if source != nil { 129 source.Exclusions = exclusions 130 } 131 return source, cleanupFn, err 132 } 133 134 // New produces a Source based on userInput like dir: or image:tag 135 func New(in Input, registryOptions *image.RegistryOptions, exclusions []string) (*Source, func(), error) { 136 var err error 137 fs := afero.NewOsFs() 138 var source *Source 139 cleanupFn := func() {} 140 141 switch in.Scheme { 142 case FileScheme: 143 source, cleanupFn, err = generateFileSource(fs, in) 144 case DirectoryScheme: 145 source, cleanupFn, err = generateDirectorySource(fs, in) 146 case ImageScheme: 147 source, cleanupFn, err = generateImageSource(in, registryOptions) 148 default: 149 err = fmt.Errorf("unable to process input for scanning: %q", in.UserInput) 150 } 151 152 if err == nil { 153 source.Exclusions = exclusions 154 } 155 156 return source, cleanupFn, err 157 } 158 159 func generateImageSource(in Input, registryOptions *image.RegistryOptions) (*Source, func(), error) { 160 img, cleanup, err := getImageWithRetryStrategy(in, registryOptions) 161 if err != nil || img == nil { 162 return nil, cleanup, fmt.Errorf("could not fetch image %q: %w", in.Location, err) 163 } 164 165 s, err := NewFromImageWithNameVersion(img, in.Location, in.Name, in.Version) 166 if err != nil { 167 return nil, cleanup, fmt.Errorf("could not populate source with image: %w", err) 168 } 169 170 return &s, cleanup, nil 171 } 172 173 func parseScheme(userInput string) string { 174 parts := strings.SplitN(userInput, ":", 2) 175 if len(parts) < 2 { 176 return "" 177 } 178 179 return parts[0] 180 } 181 182 func getImageWithRetryStrategy(in Input, registryOptions *image.RegistryOptions) (*image.Image, func(), error) { 183 ctx := context.TODO() 184 185 var opts []stereoscope.Option 186 if registryOptions != nil { 187 opts = append(opts, stereoscope.WithRegistryOptions(*registryOptions)) 188 } 189 190 if in.Platform != "" { 191 opts = append(opts, stereoscope.WithPlatform(in.Platform)) 192 } 193 194 img, err := stereoscope.GetImageFromSource(ctx, in.Location, in.ImageSource, opts...) 195 cleanup := func() { 196 if err := img.Cleanup(); err != nil { 197 log.Warnf("unable to cleanup image=%q: %w", in.UserInput, err) 198 } 199 } 200 if err == nil { 201 // Success on the first try! 202 return img, cleanup, nil 203 } 204 205 scheme := parseScheme(in.UserInput) 206 if !(scheme == "docker" || scheme == "registry") { 207 // Image retrieval failed, and we shouldn't retry it. It's most likely that the 208 // user _did_ intend the parsed scheme, but there was a legitimate failure with 209 // using the scheme to load the image. Alert the user to this failure, so they 210 // can fix the problem. 211 return nil, nil, err 212 } 213 214 // Maybe the user wanted "docker" or "registry" to refer to an _image name_ 215 // (e.g. "docker:latest"), not a scheme. We'll retry image retrieval with this 216 // alternative interpretation, in an attempt to avoid unnecessary user friction. 217 218 log.Warnf( 219 "scheme %q specified, but it coincides with a common image name; re-examining user input %q"+ 220 " without scheme parsing because image retrieval using scheme parsing was unsuccessful: %v", 221 scheme, 222 in.UserInput, 223 err, 224 ) 225 226 // We need to determine the image source again, such that this determination 227 // doesn't take scheme parsing into account. 228 in.ImageSource = image.DetermineDefaultImagePullSource(in.UserInput) 229 img, userInputErr := stereoscope.GetImageFromSource(ctx, in.UserInput, in.ImageSource, opts...) 230 cleanup = func() { 231 if err := img.Cleanup(); err != nil { 232 log.Warnf("unable to cleanup image=%q: %w", in.UserInput, err) 233 } 234 } 235 if userInputErr != nil { 236 // Image retrieval failed on both tries, we will want to return both errors. 237 return nil, nil, fmt.Errorf( 238 "scheme %q specified; "+ 239 "image retrieval using scheme parsing (%s) was unsuccessful: %v; "+ 240 "image retrieval without scheme parsing (%s) was unsuccessful: %v", 241 scheme, 242 in.Location, 243 err, 244 in.UserInput, 245 userInputErr, 246 ) 247 } 248 249 return img, cleanup, nil 250 } 251 252 func generateDirectorySource(fs afero.Fs, in Input) (*Source, func(), error) { 253 fileMeta, err := fs.Stat(in.Location) 254 if err != nil { 255 return nil, func() {}, fmt.Errorf("unable to stat dir=%q: %w", in.Location, err) 256 } 257 258 if !fileMeta.IsDir() { 259 return nil, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", in.Location, err) 260 } 261 262 s, err := NewFromDirectoryWithNameVersion(in.Location, in.Name, in.Version) 263 if err != nil { 264 return nil, func() {}, fmt.Errorf("could not populate source from path=%q: %w", in.Location, err) 265 } 266 267 return &s, func() {}, nil 268 } 269 270 func generateFileSource(fs afero.Fs, in Input) (*Source, func(), error) { 271 fileMeta, err := fs.Stat(in.Location) 272 if err != nil { 273 return nil, func() {}, fmt.Errorf("unable to stat dir=%q: %w", in.Location, err) 274 } 275 276 if fileMeta.IsDir() { 277 return nil, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", in.Location, err) 278 } 279 280 s, cleanupFn := NewFromFileWithNameVersion(in.Location, in.Name, in.Version) 281 282 return &s, cleanupFn, nil 283 } 284 285 // NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively. 286 func NewFromDirectory(path string) (Source, error) { 287 return NewFromDirectoryWithName(path, "") 288 } 289 290 // NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. 291 func NewFromDirectoryWithName(path string, name string) (Source, error) { 292 return NewFromDirectoryWithNameVersion(path, name, "") 293 } 294 295 // NewFromDirectoryWithNameVersion creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. 296 func NewFromDirectoryWithNameVersion(path string, name string, version string) (Source, error) { 297 s := Source{ 298 mutex: &sync.Mutex{}, 299 Metadata: Metadata{ 300 Name: name, 301 Version: version, 302 Scheme: DirectoryScheme, 303 Path: path, 304 }, 305 path: path, 306 } 307 s.SetID() 308 return s, nil 309 } 310 311 // NewFromDirectoryRoot creates a new source object tailored to catalog a given filesystem directory recursively. 312 func NewFromDirectoryRoot(path string) (Source, error) { 313 return NewFromDirectoryRootWithName(path, "") 314 } 315 316 // NewFromDirectoryRootWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. 317 func NewFromDirectoryRootWithName(path string, name string) (Source, error) { 318 return NewFromDirectoryRootWithNameVersion(path, name, "") 319 } 320 321 // NewFromDirectoryRootWithNameVersion creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. 322 func NewFromDirectoryRootWithNameVersion(path string, name string, version string) (Source, error) { 323 s := Source{ 324 mutex: &sync.Mutex{}, 325 Metadata: Metadata{ 326 Name: name, 327 Version: version, 328 Scheme: DirectoryScheme, 329 Path: path, 330 Base: path, 331 }, 332 path: path, 333 base: path, 334 } 335 s.SetID() 336 return s, nil 337 } 338 339 // NewFromFile creates a new source object tailored to catalog a file. 340 func NewFromFile(path string) (Source, func()) { 341 return NewFromFileWithName(path, "") 342 } 343 344 // NewFromFileWithName creates a new source object tailored to catalog a file, with an explicitly provided name. 345 func NewFromFileWithName(path string, name string) (Source, func()) { 346 return NewFromFileWithNameVersion(path, name, "") 347 } 348 349 // NewFromFileWithNameVersion creates a new source object tailored to catalog a file, with an explicitly provided name and version. 350 func NewFromFileWithNameVersion(path string, name string, version string) (Source, func()) { 351 analysisPath, cleanupFn := fileAnalysisPath(path) 352 353 s := Source{ 354 mutex: &sync.Mutex{}, 355 Metadata: Metadata{ 356 Name: name, 357 Version: version, 358 Scheme: FileScheme, 359 Path: path, 360 }, 361 path: analysisPath, 362 } 363 364 s.SetID() 365 return s, cleanupFn 366 } 367 368 // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive 369 // contents have been made available. A cleanup function is provided for any temp files created (if any). 370 func fileAnalysisPath(path string) (string, func()) { 371 var analysisPath = path 372 var cleanupFn = func() {} 373 374 // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and 375 // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is 376 // unarchived. 377 envelopedUnarchiver, err := archiver.ByExtension(path) 378 if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { 379 if tar, ok := unarchiver.(*archiver.Tar); ok { 380 // when tar files are extracted, if there are multiple entries at the same 381 // location, the last entry wins 382 // NOTE: this currently does not display any messages if an overwrite happens 383 tar.OverwriteExisting = true 384 } 385 unarchivedPath, tmpCleanup, err := unarchiveToTmp(path, unarchiver) 386 if err != nil { 387 log.Warnf("file could not be unarchived: %+v", err) 388 } else { 389 log.Debugf("source path is an archive") 390 analysisPath = unarchivedPath 391 } 392 if tmpCleanup != nil { 393 cleanupFn = tmpCleanup 394 } 395 } 396 397 return analysisPath, cleanupFn 398 } 399 400 // NewFromImage creates a new source object tailored to catalog a given container image, relative to the 401 // option given (e.g. all-layers, squashed, etc) 402 func NewFromImage(img *image.Image, userImageStr string) (Source, error) { 403 return NewFromImageWithName(img, userImageStr, "") 404 } 405 406 // NewFromImageWithName creates a new source object tailored to catalog a given container image, relative to the 407 // option given (e.g. all-layers, squashed, etc), with an explicit name. 408 func NewFromImageWithName(img *image.Image, userImageStr string, name string) (Source, error) { 409 return NewFromImageWithNameVersion(img, userImageStr, name, "") 410 } 411 412 // NewFromImageWithNameVersion creates a new source object tailored to catalog a given container image, relative to the 413 // option given (e.g. all-layers, squashed, etc), with an explicit name and version. 414 func NewFromImageWithNameVersion(img *image.Image, userImageStr string, name string, version string) (Source, error) { 415 if img == nil { 416 return Source{}, fmt.Errorf("no image given") 417 } 418 419 s := Source{ 420 Image: img, 421 Metadata: Metadata{ 422 Name: name, 423 Version: version, 424 Scheme: ImageScheme, 425 ImageMetadata: NewImageMetadata(img, userImageStr), 426 }, 427 } 428 s.SetID() 429 return s, nil 430 } 431 432 func (s *Source) ID() artifact.ID { 433 if s.id == "" { 434 s.SetID() 435 } 436 return s.id 437 } 438 439 func (s *Source) SetID() { 440 var d string 441 switch s.Metadata.Scheme { 442 case DirectoryScheme: 443 d = digest.FromString(s.Metadata.Path).String() 444 case FileScheme: 445 // attempt to use the digest of the contents of the file as the ID 446 file, err := os.Open(s.Metadata.Path) 447 if err != nil { 448 d = digest.FromString(s.Metadata.Path).String() 449 break 450 } 451 defer file.Close() 452 di, err := digest.FromReader(file) 453 if err != nil { 454 d = digest.FromString(s.Metadata.Path).String() 455 break 456 } 457 d = di.String() 458 case ImageScheme: 459 manifestDigest := digest.FromBytes(s.Metadata.ImageMetadata.RawManifest).String() 460 if manifestDigest != "" { 461 d = manifestDigest 462 break 463 } 464 465 // calcuate chain ID for image sources where manifestDigest is not available 466 // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid 467 d = calculateChainID(s.Metadata.ImageMetadata.Layers) 468 if d == "" { 469 // TODO what happens here if image has no layers? 470 // Is this case possible 471 d = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() 472 } 473 default: // for UnknownScheme we hash the struct 474 id, _ := artifact.IDByHash(s) 475 d = string(id) 476 } 477 478 s.id = artifact.ID(strings.TrimPrefix(d, "sha256:")) 479 s.Metadata.ID = strings.TrimPrefix(d, "sha256:") 480 } 481 482 func calculateChainID(lm []LayerMetadata) string { 483 if len(lm) < 1 { 484 return "" 485 } 486 487 // DiffID(L0) = digest of layer 0 488 // https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32 489 chainID := lm[0].Digest 490 id := chain(chainID, lm[1:]) 491 492 return id 493 } 494 495 func chain(chainID string, layers []LayerMetadata) string { 496 if len(layers) < 1 { 497 return chainID 498 } 499 500 chainID = digest.FromString(layers[0].Digest + " " + chainID).String() 501 return chain(chainID, layers[1:]) 502 } 503 504 func (s *Source) FileResolver(scope Scope) (file.Resolver, error) { 505 switch s.Metadata.Scheme { 506 case DirectoryScheme, FileScheme: 507 s.mutex.Lock() 508 defer s.mutex.Unlock() 509 if s.directoryResolver == nil { 510 exclusionFunctions, err := getDirectoryExclusionFunctions(s.path, s.Exclusions) 511 if err != nil { 512 return nil, err 513 } 514 res, err := fileresolver.NewFromDirectory(s.path, s.base, exclusionFunctions...) 515 if err != nil { 516 return nil, fmt.Errorf("unable to create directory resolver: %w", err) 517 } 518 s.directoryResolver = res 519 } 520 return s.directoryResolver, nil 521 case ImageScheme: 522 var res file.Resolver 523 var err error 524 switch scope { 525 case SquashedScope: 526 res, err = fileresolver.NewFromContainerImageSquash(s.Image) 527 case AllLayersScope: 528 res, err = fileresolver.NewFromContainerImageAllLayers(s.Image) 529 default: 530 return nil, fmt.Errorf("bad image scope provided: %+v", scope) 531 } 532 if err != nil { 533 return nil, err 534 } 535 // image tree contains all paths, so we filter out the excluded entries afterwards 536 if len(s.Exclusions) > 0 { 537 res = fileresolver.NewExcluding(res, getImageExclusionFunction(s.Exclusions)) 538 } 539 return res, nil 540 } 541 return nil, fmt.Errorf("unable to determine FilePathResolver with current scheme=%q", s.Metadata.Scheme) 542 } 543 544 func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func(), error) { 545 tempDir, err := os.MkdirTemp("", "gosbom-archive-contents-") 546 if err != nil { 547 return "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err) 548 } 549 550 cleanupFn := func() { 551 if err := os.RemoveAll(tempDir); err != nil { 552 log.Warnf("unable to cleanup archive tempdir: %+v", err) 553 } 554 } 555 556 return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) 557 } 558 559 func getImageExclusionFunction(exclusions []string) func(string) bool { 560 if len(exclusions) == 0 { 561 return nil 562 } 563 // add subpath exclusions 564 for _, exclusion := range exclusions { 565 exclusions = append(exclusions, exclusion+"/**") 566 } 567 return func(path string) bool { 568 for _, exclusion := range exclusions { 569 matches, err := doublestar.Match(exclusion, path) 570 if err != nil { 571 return false 572 } 573 if matches { 574 return true 575 } 576 } 577 return false 578 } 579 } 580 581 func getDirectoryExclusionFunctions(root string, exclusions []string) ([]fileresolver.PathIndexVisitor, error) { 582 if len(exclusions) == 0 { 583 return nil, nil 584 } 585 586 // this is what Directory.indexTree is doing to get the absolute path: 587 root, err := filepath.Abs(root) 588 if err != nil { 589 return nil, err 590 } 591 592 // this handles Windows file paths by converting them to C:/something/else format 593 root = filepath.ToSlash(root) 594 595 if !strings.HasSuffix(root, "/") { 596 root += "/" 597 } 598 599 var errors []string 600 for idx, exclusion := range exclusions { 601 // check exclusions for supported paths, these are all relative to the "scan root" 602 if strings.HasPrefix(exclusion, "./") || strings.HasPrefix(exclusion, "*/") || strings.HasPrefix(exclusion, "**/") { 603 exclusion = strings.TrimPrefix(exclusion, "./") 604 exclusions[idx] = root + exclusion 605 } else { 606 errors = append(errors, exclusion) 607 } 608 } 609 610 if errors != nil { 611 return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '")) 612 } 613 614 return []fileresolver.PathIndexVisitor{ 615 func(path string, info os.FileInfo, _ error) error { 616 for _, exclusion := range exclusions { 617 // this is required to handle Windows filepaths 618 path = filepath.ToSlash(path) 619 matches, err := doublestar.Match(exclusion, path) 620 if err != nil { 621 return nil 622 } 623 if matches { 624 if info != nil && info.IsDir() { 625 return filepath.SkipDir 626 } 627 return fileresolver.ErrSkipPath 628 } 629 } 630 return nil 631 }, 632 }, nil 633 }