github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/parse_go_mod.go (about) 1 package golang 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "go/build" 8 "io" 9 "path/filepath" 10 "slices" 11 "sort" 12 "strings" 13 14 "github.com/spf13/afero" 15 "golang.org/x/mod/modfile" 16 "golang.org/x/tools/go/packages" 17 18 "github.com/anchore/syft/internal" 19 "github.com/anchore/syft/internal/log" 20 "github.com/anchore/syft/internal/unknown" 21 "github.com/anchore/syft/syft/artifact" 22 "github.com/anchore/syft/syft/file" 23 "github.com/anchore/syft/syft/internal/fileresolver" 24 "github.com/anchore/syft/syft/pkg" 25 "github.com/anchore/syft/syft/pkg/cataloger/generic" 26 ) 27 28 type goModCataloger struct { 29 licenseResolver goLicenseResolver 30 } 31 32 func newGoModCataloger(opts CatalogerConfig) *goModCataloger { 33 return &goModCataloger{ 34 licenseResolver: newGoLicenseResolver(modFileCatalogerName, opts), 35 } 36 } 37 38 // parseGoModFile takes a go.mod and tries to resolve and lists all packages discovered. 39 func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 40 modDir := filepath.Dir(string(reader.Location.Reference().RealPath)) 41 digests, err := parseGoSumFile(resolver, reader) 42 if err != nil { 43 log.Debugf("unable to get go.sum: %v", err) 44 } 45 46 scanRoot := "" 47 if dir, ok := resolver.(*fileresolver.Directory); ok && dir != nil { 48 scanRoot = dir.Chroot.Base() 49 } 50 51 // source analysis using go toolchain if available 52 syftSourcePackages, sourceModules, sourceDependencies, unknownErr := c.loadPackages(modDir, reader.Location) 53 catalogedModules, sourceModuleToPkg := c.catalogModules(ctx, scanRoot, syftSourcePackages, sourceModules, reader, digests) 54 relationships := buildModuleRelationships(catalogedModules, sourceDependencies, sourceModuleToPkg) 55 56 // base case go.mod file parsing 57 modFile, err := c.parseModFileContents(reader) 58 if err != nil { 59 return nil, nil, err 60 } 61 62 // only use mod packages NOT found in source analysis 63 goModPackages := c.createGoModPackages(ctx, resolver, modFile, sourceModules, reader, digests) 64 c.applyReplaceDirectives(ctx, resolver, modFile, goModPackages, reader, digests) 65 c.applyExcludeDirectives(modFile, goModPackages) 66 67 finalPkgs := c.assembleResults(catalogedModules, goModPackages) 68 return finalPkgs, relationships, unknownErr 69 } 70 71 // loadPackages uses golang.org/x/tools/go/packages to get dependency information. 72 func (c *goModCataloger) loadPackages(modDir string, loc file.Location) (pkgs map[string][]pkgInfo, modules map[string]*packages.Module, dependencies map[string][]string, unknownErr error) { 73 cfg := &packages.Config{ 74 // Mode flags control what information is loaded for each package. 75 // Performance impact increases significantly with each additional flag: 76 // 77 // packages.NeedModule - Required for module metadata (path, version, replace directives). 78 // Essential for SBOM generation. Minimal performance impact. 79 // 80 // packages.NeedName - Required for package names & package Path. Minimal performance impact. 81 // Needed to identify packages and filter out standard library packages. 82 // 83 // packages.NeedFiles - Loads source file paths for each package. 84 // Moderate performance impact as it requires filesystem traversal. 85 // Required for license discovery. 86 // 87 // packages.NeedDeps - Loads the dependency graph between packages. 88 // High performance impact as it builds the complete import graph. 89 // Critical for generating accurate dependency relationships in SBOM. 90 // 91 // packages.NeedImports - Loads import information for each package. 92 // High performance impact, especially with large codebases. 93 // Required for building module-to-module dependency mappings. 94 // 95 // Adding flags like NeedTypes, NeedSyntax, or NeedTypesInfo would dramatically 96 // increase memory usage and processing time (10x+ slower) but are not needed 97 // for SBOM generation as we only require dependency and module metadata. 98 Mode: packages.NeedModule | packages.NeedName | packages.NeedFiles | packages.NeedDeps | packages.NeedImports, 99 Dir: modDir, 100 Tests: true, 101 } 102 103 // From Go documentation: "all" expands to all packages in the main module 104 // and their dependencies, including dependencies needed by tests. 105 // 106 // The special pattern "all" specifies all the active modules, 107 // first the main module and then dependencies sorted by module path. 108 // A pattern containing "..." specifies the active modules whose module paths match the pattern. 109 // On implementation we could not find a test case that differentiated between all and ... 110 // There may be a case where ... is non inclusive so we default to all for the inclusive guarantee 111 rootPkgs, err := packages.Load(cfg, "all") 112 if err != nil { 113 log.Debugf("error loading packages: %v", err) 114 } 115 116 // Check for any errors in loading 117 for _, p := range rootPkgs { 118 if len(p.Errors) > 0 { 119 // Log errors but continue processing 120 for _, e := range p.Errors { 121 log.Debugf("package load error for %s: %v", p.PkgPath, e) 122 unknownErr = unknown.Append(unknownErr, loc, err) 123 } 124 } 125 } 126 127 // note: dependencies have already pruned local imports and only focuses on module => module dependencies 128 return c.visitPackages(rootPkgs, loc, unknownErr) 129 } 130 131 type pkgInfo struct { 132 // pkgPath is the import path of the package. 133 pkgPath string 134 // modulePath is the module path of the package. 135 modulePath string 136 // pkgDir is the directory containing the package's source code. 137 pkgDir string 138 // moduleDir is the directory containing the module's source code. 139 moduleDir string 140 } 141 142 // visitPackages processes Go module import graphs to get all modules 143 func (c *goModCataloger) visitPackages( 144 rootPkgs []*packages.Package, 145 loc file.Location, 146 uke error, 147 ) (pkgs map[string][]pkgInfo, modules map[string]*packages.Module, dependencies map[string][]string, unknownErr error) { 148 modules = make(map[string]*packages.Module) 149 // note: packages are specific to inside the module - they do not include transitive pkgInfo 150 // packages is used for identifying licensing documents for modules that could contain multiple licenses 151 // dependencies cover transitive module imports; see p.Imports array in packages.Visit 152 pkgs = make(map[string][]pkgInfo) 153 // dependencies are module => module dependencies 154 dependencies = make(map[string][]string) 155 // persist unknown errs from previous parts of the catalog 156 unknownErr = uke 157 // closure (p *Package) bool 158 // return bool determines whether the imports of package p are visited. 159 packages.Visit(rootPkgs, func(p *packages.Package) bool { 160 if len(p.Errors) > 0 { 161 for _, err := range p.Errors { 162 unknownErr = unknown.Append(unknownErr, loc, err) 163 } 164 return false 165 } 166 167 // skip for common causes 168 if shouldSkipVisit(p) { 169 return false 170 } 171 172 // different from above; we still might want to visit imports 173 // ignoring a package shouldn't end walking the tree 174 // since we need to get the full picture for license discovery 175 // for _, prefix := range c.config.IgnorePaths { 176 // if strings.HasPrefix(p.PkgPath, prefix) { 177 // return c.config.IncludeIgnoredDeps 178 // } 179 //} 180 pkgDir := resolvePkgDir(p) 181 if pkgDir == "" { 182 return true 183 } 184 185 module := newModule(p.Module) 186 if module.Dir == "" { 187 // We continue processing even when module.Dir is empty because we still want to: 188 // 1. Extract module dependencies from p.Imports for dependency graph construction 189 // 2. Create syft packages with available metadata (name, version, etc.) 190 // 3. Build relationships between modules even without complete filesystem info 191 // Not having the DIR here just means that we're not going to process the licenses 192 193 // Common causes for module.Dir being empty: 194 // - Vendored dependencies where Go toolchain loses some module metadata 195 // - Replace directives pointing to non-existent or inaccessible paths 196 // A known cause is that the module is vendored, so some information is lost. 197 isVendored := strings.Contains(pkgDir, "/vendor/") 198 if !isVendored { 199 log.Debugf("module %s does not have dir and it's not vendored", module.Path) 200 } 201 } 202 203 // extract module dependencies 204 for _, imp := range p.Imports { 205 if imp.Module != nil && imp.Module.Path != module.Path { 206 if dependencies[module.Path] == nil { 207 dependencies[module.Path] = []string{imp.Module.Path} 208 } else { 209 dependencies[module.Path] = append(dependencies[module.Path], imp.Module.Path) 210 } 211 } 212 } 213 214 info := pkgInfo{ 215 pkgPath: p.PkgPath, 216 modulePath: module.Path, 217 pkgDir: pkgDir, 218 moduleDir: module.Dir, 219 } 220 if !slices.Contains(pkgs[module.Path], info) { // avoid duplicates 221 pkgs[module.Path] = append(pkgs[module.Path], info) 222 } 223 modules[p.Module.Path] = module 224 225 return true 226 }, nil) 227 return pkgs, modules, dependencies, unknownErr 228 } 229 230 // create syft packages from Go modules found by the go toolchain 231 func (c *goModCataloger) catalogModules( 232 ctx context.Context, 233 scanRoot string, 234 pkgs map[string][]pkgInfo, 235 modules map[string]*packages.Module, 236 reader file.LocationReadCloser, 237 digests map[string]string, 238 ) ([]pkg.Package, map[string]artifact.Identifiable) { 239 syftPackages := make([]pkg.Package, 0) 240 moduleToPackage := make(map[string]artifact.Identifiable) 241 242 for _, m := range modules { 243 if isRelativeImportOrMain(m.Path) { 244 // relativeImport modules are already accounted for by their full module paths at other portions of syft's cataloging 245 // example: something like ../../ found as a module for go.mod b, which is sub to go.mod a is accounted for 246 // in another call to the goModCataloger when go.mod a is parsed 247 // local modules that use a "main" heuristic, no module naming (sometimes common pre go module support) 248 // are also not built as syft packages 249 continue 250 } 251 252 pkgInfos := pkgs[m.Path] 253 moduleLicenses := resolveModuleLicenses(ctx, scanRoot, pkgInfos, afero.NewOsFs()) 254 // we do out of source lookups for module parsing 255 // locations are NOT included in the SBOM because of this 256 goModulePkg := pkg.Package{ 257 Name: m.Path, 258 Version: m.Version, 259 Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), 260 Licenses: moduleLicenses, 261 Language: pkg.Go, 262 Type: pkg.GoModulePkg, 263 PURL: packageURL(m.Path, m.Version), 264 Metadata: createSourceMetadata(digests[fmt.Sprintf("%s %s", m.Path, m.Version)]), 265 } 266 goModulePkg.SetID() 267 268 moduleToPackage[m.Path] = goModulePkg 269 syftPackages = append(syftPackages, goModulePkg) 270 } 271 272 return syftPackages, moduleToPackage 273 } 274 275 // buildModuleRelationships creates artifact relationships between Go modules. 276 func buildModuleRelationships( 277 syftPkgs []pkg.Package, 278 dependencies map[string][]string, 279 moduleToPkg map[string]artifact.Identifiable, 280 ) []artifact.Relationship { 281 var rels []artifact.Relationship 282 seen := make(map[string]struct{}) 283 284 for _, fromPkg := range syftPkgs { 285 for _, dep := range dependencies[fromPkg.Name] { 286 if dep == fromPkg.Name { 287 continue 288 } 289 toPkg, ok := moduleToPkg[dep] 290 if !ok { 291 continue 292 } 293 294 key := string(fromPkg.ID()) + string(toPkg.ID()) 295 if _, exists := seen[key]; exists { 296 continue 297 } 298 299 rels = append(rels, artifact.Relationship{ 300 From: toPkg, // dep 301 To: fromPkg, // parent 302 Type: artifact.DependencyOfRelationship, 303 }) 304 seen[key] = struct{}{} 305 } 306 } 307 308 return rels 309 } 310 311 func (c *goModCataloger) parseModFileContents(reader file.LocationReadCloser) (*modfile.File, error) { 312 contents, err := io.ReadAll(reader) 313 if err != nil { 314 return nil, fmt.Errorf("failed to read go module: %w", err) 315 } 316 317 f, err := modfile.Parse(reader.RealPath, contents, nil) 318 if err != nil { 319 return nil, fmt.Errorf("failed to parse go module: %w", err) 320 } 321 322 return f, nil 323 } 324 325 // note this handles the deduplication from source by checking if the mod path exists in the sourceModules map 326 func (c *goModCataloger) createGoModPackages(ctx context.Context, resolver file.Resolver, modFile *modfile.File, sourceModules map[string]*packages.Module, reader file.LocationReadCloser, digests map[string]string) map[string]pkg.Package { 327 goModPackages := make(map[string]pkg.Package) 328 329 for _, m := range modFile.Require { 330 if _, exists := sourceModules[m.Mod.Path]; !exists { 331 lics := c.licenseResolver.getLicenses(ctx, resolver, m.Mod.Path, m.Mod.Version) 332 goModPkg := pkg.Package{ 333 Name: m.Mod.Path, 334 Version: m.Mod.Version, 335 Licenses: pkg.NewLicenseSet(lics...), 336 Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), 337 PURL: packageURL(m.Mod.Path, m.Mod.Version), 338 Language: pkg.Go, 339 Type: pkg.GoModulePkg, 340 Metadata: pkg.GolangModuleEntry{ 341 H1Digest: digests[fmt.Sprintf("%s %s", m.Mod.Path, m.Mod.Version)], 342 }, 343 } 344 goModPkg.SetID() 345 goModPackages[m.Mod.Path] = goModPkg 346 } 347 } 348 349 return goModPackages 350 } 351 352 // applyReplaceDirectives processes replace directives from go.mod 353 func (c *goModCataloger) applyReplaceDirectives(ctx context.Context, resolver file.Resolver, modFile *modfile.File, goModPackages map[string]pkg.Package, reader file.LocationReadCloser, digests map[string]string) { 354 for _, m := range modFile.Replace { 355 lics := c.licenseResolver.getLicenses(ctx, resolver, m.New.Path, m.New.Version) 356 var finalPath string 357 if !strings.HasPrefix(m.New.Path, ".") && !strings.HasPrefix(m.New.Path, "/") { 358 finalPath = m.New.Path 359 delete(goModPackages, m.Old.Path) 360 } else { 361 finalPath = m.Old.Path 362 } 363 goModPkg := pkg.Package{ 364 Name: finalPath, 365 Version: m.New.Version, 366 Licenses: pkg.NewLicenseSet(lics...), 367 Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), 368 PURL: packageURL(finalPath, m.New.Version), 369 Language: pkg.Go, 370 Type: pkg.GoModulePkg, 371 Metadata: pkg.GolangModuleEntry{ 372 H1Digest: digests[fmt.Sprintf("%s %s", finalPath, m.New.Version)], 373 }, 374 } 375 goModPkg.SetID() 376 goModPackages[finalPath] = goModPkg 377 } 378 } 379 380 func (c *goModCataloger) applyExcludeDirectives(modFile *modfile.File, goModPackages map[string]pkg.Package) { 381 for _, m := range modFile.Exclude { 382 delete(goModPackages, m.Mod.Path) 383 } 384 } 385 386 func (c *goModCataloger) assembleResults(catalogedPkgs []pkg.Package, goModPackages map[string]pkg.Package) []pkg.Package { 387 pkgsSlice := make([]pkg.Package, 0) 388 389 pkgsSlice = append(pkgsSlice, catalogedPkgs...) 390 391 for _, p := range goModPackages { 392 pkgsSlice = append(pkgsSlice, p) 393 } 394 395 sort.SliceStable(pkgsSlice, func(i, j int) bool { 396 return pkgsSlice[i].Name < pkgsSlice[j].Name 397 }) 398 399 return pkgsSlice 400 } 401 402 func parseGoSumFile(resolver file.Resolver, reader file.LocationReadCloser) (map[string]string, error) { 403 out := map[string]string{} 404 405 if resolver == nil { 406 return out, fmt.Errorf("no resolver provided") 407 } 408 409 goSumPath := strings.TrimSuffix(reader.RealPath, ".mod") + ".sum" 410 goSumLocation := resolver.RelativeFileByPath(reader.Location, goSumPath) 411 if goSumLocation == nil { 412 return nil, fmt.Errorf("unable to resolve: %s", goSumPath) 413 } 414 contents, err := resolver.FileContentsByLocation(*goSumLocation) 415 if err != nil { 416 return nil, err 417 } 418 defer internal.CloseAndLogError(contents, goSumLocation.AccessPath) 419 420 // go.sum has the format like: 421 // github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 422 // github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw= 423 // github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= 424 scanner := bufio.NewScanner(contents) 425 // optionally, resize scanner's capacity for lines over 64K, see next example 426 for scanner.Scan() { 427 line := scanner.Text() 428 parts := strings.Split(line, " ") 429 if len(parts) < 3 { 430 continue 431 } 432 nameVersion := fmt.Sprintf("%s %s", parts[0], parts[1]) 433 hash := parts[2] 434 out[nameVersion] = hash 435 } 436 437 return out, nil 438 } 439 440 // createSourceMetadata creates metadata for packages found through source analysis using build.Default 441 func createSourceMetadata(h1Digest string) pkg.GolangSourceEntry { 442 return pkg.GolangSourceEntry{ 443 H1Digest: h1Digest, 444 OperatingSystem: build.Default.GOOS, 445 Architecture: build.Default.GOARCH, 446 BuildTags: strings.Join(build.Default.BuildTags, ","), 447 CgoEnabled: build.Default.CgoEnabled, 448 } 449 } 450 451 func resolvePkgDir(p *packages.Package) string { 452 switch { 453 case len(p.GoFiles) > 0: 454 return filepath.Dir(p.GoFiles[0]) 455 case len(p.CompiledGoFiles) > 0: 456 return filepath.Dir(p.CompiledGoFiles[0]) 457 case len(p.OtherFiles) > 0: 458 return filepath.Dir(p.OtherFiles[0]) 459 default: 460 return "" 461 } 462 } 463 464 func shouldSkipVisit(p *packages.Package) bool { 465 // skip packages that don't have module info 466 if p.Module == nil { 467 return true 468 } 469 470 // skip stdlib 471 if isStdLib(p) { 472 return true 473 } 474 475 return false 476 } 477 478 // isStdLib returns true if this package is part of the Go standard library. 479 func isStdLib(pkg *packages.Package) bool { 480 if pkg.Name == "unsafe" { 481 // Special case unsafe stdlib, because it does not contain go files. 482 return true 483 } 484 if len(pkg.GoFiles) == 0 { 485 return false 486 } 487 prefix := build.Default.GOROOT 488 sep := string(filepath.Separator) 489 if !strings.HasSuffix(prefix, sep) { 490 prefix += sep 491 } 492 return strings.HasPrefix(pkg.GoFiles[0], prefix) 493 } 494 495 // handle replace directives 496 func newModule(mod *packages.Module) *packages.Module { 497 // Example of a module with replace directive: k8s.io/kubernetes => k8s.io/kubernetes v1.11.1 498 // { 499 // "Path": "k8s.io/kubernetes", 500 // "Version": "v0.17.9", 501 // "Replace": { 502 // "Path": "k8s.io/kubernetes", 503 // "Version": "v1.11.1", 504 // "Time": "2018-07-17T04:20:29Z", 505 // "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1", 506 // "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod" 507 // }, 508 // "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1", 509 // "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod" 510 // } 511 // handle replace directives 512 // Note, we specifically want to replace version field. 513 // Haven't confirmed, but we may also need to override the 514 // entire struct when using replace directive with local folders. 515 tmp := *mod 516 if tmp.Replace != nil { 517 tmp = *tmp.Replace 518 } 519 520 return &tmp 521 } 522 523 func isRelativeImportOrMain(p string) bool { 524 if p == "main" { 525 return true 526 } 527 // true for ".", "..", "./...", "../..." 528 return build.IsLocalImport(p) 529 }