github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/parse_go_mod.go (about)

     1  package golang
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"go/build"
     8  	"io"
     9  	"path/filepath"
    10  	"slices"
    11  	"sort"
    12  	"strings"
    13  
    14  	"github.com/spf13/afero"
    15  	"golang.org/x/mod/modfile"
    16  	"golang.org/x/tools/go/packages"
    17  
    18  	"github.com/anchore/syft/internal"
    19  	"github.com/anchore/syft/internal/log"
    20  	"github.com/anchore/syft/internal/unknown"
    21  	"github.com/anchore/syft/syft/artifact"
    22  	"github.com/anchore/syft/syft/file"
    23  	"github.com/anchore/syft/syft/internal/fileresolver"
    24  	"github.com/anchore/syft/syft/pkg"
    25  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    26  )
    27  
    28  type goModCataloger struct {
    29  	licenseResolver goLicenseResolver
    30  }
    31  
    32  func newGoModCataloger(opts CatalogerConfig) *goModCataloger {
    33  	return &goModCataloger{
    34  		licenseResolver: newGoLicenseResolver(modFileCatalogerName, opts),
    35  	}
    36  }
    37  
    38  // parseGoModFile takes a go.mod and tries to resolve and lists all packages discovered.
    39  func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    40  	modDir := filepath.Dir(string(reader.Location.Reference().RealPath))
    41  	digests, err := parseGoSumFile(resolver, reader)
    42  	if err != nil {
    43  		log.Debugf("unable to get go.sum: %v", err)
    44  	}
    45  
    46  	scanRoot := ""
    47  	if dir, ok := resolver.(*fileresolver.Directory); ok && dir != nil {
    48  		scanRoot = dir.Chroot.Base()
    49  	}
    50  
    51  	// source analysis using go toolchain if available
    52  	syftSourcePackages, sourceModules, sourceDependencies, unknownErr := c.loadPackages(modDir, reader.Location)
    53  	catalogedModules, sourceModuleToPkg := c.catalogModules(ctx, scanRoot, syftSourcePackages, sourceModules, reader, digests)
    54  	relationships := buildModuleRelationships(catalogedModules, sourceDependencies, sourceModuleToPkg)
    55  
    56  	// base case go.mod file parsing
    57  	modFile, err := c.parseModFileContents(reader)
    58  	if err != nil {
    59  		return nil, nil, err
    60  	}
    61  
    62  	// only use mod packages NOT found in source analysis
    63  	goModPackages := c.createGoModPackages(ctx, resolver, modFile, sourceModules, reader, digests)
    64  	c.applyReplaceDirectives(ctx, resolver, modFile, goModPackages, reader, digests)
    65  	c.applyExcludeDirectives(modFile, goModPackages)
    66  
    67  	finalPkgs := c.assembleResults(catalogedModules, goModPackages)
    68  	return finalPkgs, relationships, unknownErr
    69  }
    70  
    71  // loadPackages uses golang.org/x/tools/go/packages to get dependency information.
    72  func (c *goModCataloger) loadPackages(modDir string, loc file.Location) (pkgs map[string][]pkgInfo, modules map[string]*packages.Module, dependencies map[string][]string, unknownErr error) {
    73  	cfg := &packages.Config{
    74  		// Mode flags control what information is loaded for each package.
    75  		// Performance impact increases significantly with each additional flag:
    76  		//
    77  		// packages.NeedModule - Required for module metadata (path, version, replace directives).
    78  		//   Essential for SBOM generation. Minimal performance impact.
    79  		//
    80  		// packages.NeedName - Required for package names & package Path. Minimal performance impact.
    81  		//   Needed to identify packages and filter out standard library packages.
    82  		//
    83  		// packages.NeedFiles - Loads source file paths for each package.
    84  		//   Moderate performance impact as it requires filesystem traversal.
    85  		//   Required for license discovery.
    86  		//
    87  		// packages.NeedDeps - Loads the dependency graph between packages.
    88  		//   High performance impact as it builds the complete import graph.
    89  		//   Critical for generating accurate dependency relationships in SBOM.
    90  		//
    91  		// packages.NeedImports - Loads import information for each package.
    92  		//   High performance impact, especially with large codebases.
    93  		//   Required for building module-to-module dependency mappings.
    94  		//
    95  		// Adding flags like NeedTypes, NeedSyntax, or NeedTypesInfo would dramatically
    96  		// increase memory usage and processing time (10x+ slower) but are not needed
    97  		// for SBOM generation as we only require dependency and module metadata.
    98  		Mode:  packages.NeedModule | packages.NeedName | packages.NeedFiles | packages.NeedDeps | packages.NeedImports,
    99  		Dir:   modDir,
   100  		Tests: true,
   101  	}
   102  
   103  	// From Go documentation: "all" expands to all packages in the main module
   104  	// and their dependencies, including dependencies needed by tests.
   105  	//
   106  	// The special pattern "all" specifies all the active modules,
   107  	// first the main module and then dependencies sorted by module path.
   108  	// A pattern containing "..." specifies the active modules whose module paths match the pattern.
   109  	// On implementation we could not find a test case that differentiated between all and ...
   110  	// There may be a case where ... is non inclusive so we default to all for the inclusive guarantee
   111  	rootPkgs, err := packages.Load(cfg, "all")
   112  	if err != nil {
   113  		log.Debugf("error loading packages: %v", err)
   114  	}
   115  
   116  	// Check for any errors in loading
   117  	for _, p := range rootPkgs {
   118  		if len(p.Errors) > 0 {
   119  			// Log errors but continue processing
   120  			for _, e := range p.Errors {
   121  				log.Debugf("package load error for %s: %v", p.PkgPath, e)
   122  				unknownErr = unknown.Append(unknownErr, loc, err)
   123  			}
   124  		}
   125  	}
   126  
   127  	// note: dependencies have already pruned local imports and only focuses on module => module dependencies
   128  	return c.visitPackages(rootPkgs, loc, unknownErr)
   129  }
   130  
   131  type pkgInfo struct {
   132  	// pkgPath is the import path of the package.
   133  	pkgPath string
   134  	// modulePath is the module path of the package.
   135  	modulePath string
   136  	// pkgDir is the directory containing the package's source code.
   137  	pkgDir string
   138  	// moduleDir is the directory containing the module's source code.
   139  	moduleDir string
   140  }
   141  
   142  // visitPackages processes Go module import graphs to get all modules
   143  func (c *goModCataloger) visitPackages(
   144  	rootPkgs []*packages.Package,
   145  	loc file.Location,
   146  	uke error,
   147  ) (pkgs map[string][]pkgInfo, modules map[string]*packages.Module, dependencies map[string][]string, unknownErr error) {
   148  	modules = make(map[string]*packages.Module)
   149  	// note: packages are specific to inside the module - they do not include transitive pkgInfo
   150  	// packages is used for identifying licensing documents for modules that could contain multiple licenses
   151  	// dependencies cover transitive module imports; see p.Imports array in packages.Visit
   152  	pkgs = make(map[string][]pkgInfo)
   153  	// dependencies are module => module dependencies
   154  	dependencies = make(map[string][]string)
   155  	// persist unknown errs from previous parts of the catalog
   156  	unknownErr = uke
   157  	// closure (p *Package) bool
   158  	// return bool determines whether the imports of package p are visited.
   159  	packages.Visit(rootPkgs, func(p *packages.Package) bool {
   160  		if len(p.Errors) > 0 {
   161  			for _, err := range p.Errors {
   162  				unknownErr = unknown.Append(unknownErr, loc, err)
   163  			}
   164  			return false
   165  		}
   166  
   167  		// skip for common causes
   168  		if shouldSkipVisit(p) {
   169  			return false
   170  		}
   171  
   172  		// different from above; we still might want to visit imports
   173  		// ignoring a package shouldn't end walking the tree
   174  		// since we need to get the full picture for license discovery
   175  		// for _, prefix := range c.config.IgnorePaths {
   176  		//	if strings.HasPrefix(p.PkgPath, prefix) {
   177  		//		return c.config.IncludeIgnoredDeps
   178  		//	}
   179  		//}
   180  		pkgDir := resolvePkgDir(p)
   181  		if pkgDir == "" {
   182  			return true
   183  		}
   184  
   185  		module := newModule(p.Module)
   186  		if module.Dir == "" {
   187  			// We continue processing even when module.Dir is empty because we still want to:
   188  			// 1. Extract module dependencies from p.Imports for dependency graph construction
   189  			// 2. Create syft packages with available metadata (name, version, etc.)
   190  			// 3. Build relationships between modules even without complete filesystem info
   191  			// Not having the DIR here just means that we're not going to process the licenses
   192  
   193  			// Common causes for module.Dir being empty:
   194  			// - Vendored dependencies where Go toolchain loses some module metadata
   195  			// - Replace directives pointing to non-existent or inaccessible paths
   196  			// A known cause is that the module is vendored, so some information is lost.
   197  			isVendored := strings.Contains(pkgDir, "/vendor/")
   198  			if !isVendored {
   199  				log.Debugf("module %s does not have dir and it's not vendored", module.Path)
   200  			}
   201  		}
   202  
   203  		// extract module dependencies
   204  		for _, imp := range p.Imports {
   205  			if imp.Module != nil && imp.Module.Path != module.Path {
   206  				if dependencies[module.Path] == nil {
   207  					dependencies[module.Path] = []string{imp.Module.Path}
   208  				} else {
   209  					dependencies[module.Path] = append(dependencies[module.Path], imp.Module.Path)
   210  				}
   211  			}
   212  		}
   213  
   214  		info := pkgInfo{
   215  			pkgPath:    p.PkgPath,
   216  			modulePath: module.Path,
   217  			pkgDir:     pkgDir,
   218  			moduleDir:  module.Dir,
   219  		}
   220  		if !slices.Contains(pkgs[module.Path], info) { // avoid duplicates
   221  			pkgs[module.Path] = append(pkgs[module.Path], info)
   222  		}
   223  		modules[p.Module.Path] = module
   224  
   225  		return true
   226  	}, nil)
   227  	return pkgs, modules, dependencies, unknownErr
   228  }
   229  
   230  // create syft packages from Go modules found by the go toolchain
   231  func (c *goModCataloger) catalogModules(
   232  	ctx context.Context,
   233  	scanRoot string,
   234  	pkgs map[string][]pkgInfo,
   235  	modules map[string]*packages.Module,
   236  	reader file.LocationReadCloser,
   237  	digests map[string]string,
   238  ) ([]pkg.Package, map[string]artifact.Identifiable) {
   239  	syftPackages := make([]pkg.Package, 0)
   240  	moduleToPackage := make(map[string]artifact.Identifiable)
   241  
   242  	for _, m := range modules {
   243  		if isRelativeImportOrMain(m.Path) {
   244  			// relativeImport modules are already accounted for by their full module paths at other portions of syft's cataloging
   245  			// example: something like ../../ found as a module for go.mod b, which is sub to go.mod a is accounted for
   246  			// in another call to the goModCataloger when go.mod a is parsed
   247  			// local modules that use a "main" heuristic, no module naming (sometimes common pre go module support)
   248  			// are also not built as syft packages
   249  			continue
   250  		}
   251  
   252  		pkgInfos := pkgs[m.Path]
   253  		moduleLicenses := resolveModuleLicenses(ctx, scanRoot, pkgInfos, afero.NewOsFs())
   254  		// we do out of source lookups for module parsing
   255  		// locations are NOT included in the SBOM because of this
   256  		goModulePkg := pkg.Package{
   257  			Name:      m.Path,
   258  			Version:   m.Version,
   259  			Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
   260  			Licenses:  moduleLicenses,
   261  			Language:  pkg.Go,
   262  			Type:      pkg.GoModulePkg,
   263  			PURL:      packageURL(m.Path, m.Version),
   264  			Metadata:  createSourceMetadata(digests[fmt.Sprintf("%s %s", m.Path, m.Version)]),
   265  		}
   266  		goModulePkg.SetID()
   267  
   268  		moduleToPackage[m.Path] = goModulePkg
   269  		syftPackages = append(syftPackages, goModulePkg)
   270  	}
   271  
   272  	return syftPackages, moduleToPackage
   273  }
   274  
   275  // buildModuleRelationships creates artifact relationships between Go modules.
   276  func buildModuleRelationships(
   277  	syftPkgs []pkg.Package,
   278  	dependencies map[string][]string,
   279  	moduleToPkg map[string]artifact.Identifiable,
   280  ) []artifact.Relationship {
   281  	var rels []artifact.Relationship
   282  	seen := make(map[string]struct{})
   283  
   284  	for _, fromPkg := range syftPkgs {
   285  		for _, dep := range dependencies[fromPkg.Name] {
   286  			if dep == fromPkg.Name {
   287  				continue
   288  			}
   289  			toPkg, ok := moduleToPkg[dep]
   290  			if !ok {
   291  				continue
   292  			}
   293  
   294  			key := string(fromPkg.ID()) + string(toPkg.ID())
   295  			if _, exists := seen[key]; exists {
   296  				continue
   297  			}
   298  
   299  			rels = append(rels, artifact.Relationship{
   300  				From: toPkg,   // dep
   301  				To:   fromPkg, // parent
   302  				Type: artifact.DependencyOfRelationship,
   303  			})
   304  			seen[key] = struct{}{}
   305  		}
   306  	}
   307  
   308  	return rels
   309  }
   310  
   311  func (c *goModCataloger) parseModFileContents(reader file.LocationReadCloser) (*modfile.File, error) {
   312  	contents, err := io.ReadAll(reader)
   313  	if err != nil {
   314  		return nil, fmt.Errorf("failed to read go module: %w", err)
   315  	}
   316  
   317  	f, err := modfile.Parse(reader.RealPath, contents, nil)
   318  	if err != nil {
   319  		return nil, fmt.Errorf("failed to parse go module: %w", err)
   320  	}
   321  
   322  	return f, nil
   323  }
   324  
   325  // note this handles the deduplication from source by checking if the mod path exists in the sourceModules map
   326  func (c *goModCataloger) createGoModPackages(ctx context.Context, resolver file.Resolver, modFile *modfile.File, sourceModules map[string]*packages.Module, reader file.LocationReadCloser, digests map[string]string) map[string]pkg.Package {
   327  	goModPackages := make(map[string]pkg.Package)
   328  
   329  	for _, m := range modFile.Require {
   330  		if _, exists := sourceModules[m.Mod.Path]; !exists {
   331  			lics := c.licenseResolver.getLicenses(ctx, resolver, m.Mod.Path, m.Mod.Version)
   332  			goModPkg := pkg.Package{
   333  				Name:      m.Mod.Path,
   334  				Version:   m.Mod.Version,
   335  				Licenses:  pkg.NewLicenseSet(lics...),
   336  				Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
   337  				PURL:      packageURL(m.Mod.Path, m.Mod.Version),
   338  				Language:  pkg.Go,
   339  				Type:      pkg.GoModulePkg,
   340  				Metadata: pkg.GolangModuleEntry{
   341  					H1Digest: digests[fmt.Sprintf("%s %s", m.Mod.Path, m.Mod.Version)],
   342  				},
   343  			}
   344  			goModPkg.SetID()
   345  			goModPackages[m.Mod.Path] = goModPkg
   346  		}
   347  	}
   348  
   349  	return goModPackages
   350  }
   351  
   352  // applyReplaceDirectives processes replace directives from go.mod
   353  func (c *goModCataloger) applyReplaceDirectives(ctx context.Context, resolver file.Resolver, modFile *modfile.File, goModPackages map[string]pkg.Package, reader file.LocationReadCloser, digests map[string]string) {
   354  	for _, m := range modFile.Replace {
   355  		lics := c.licenseResolver.getLicenses(ctx, resolver, m.New.Path, m.New.Version)
   356  		var finalPath string
   357  		if !strings.HasPrefix(m.New.Path, ".") && !strings.HasPrefix(m.New.Path, "/") {
   358  			finalPath = m.New.Path
   359  			delete(goModPackages, m.Old.Path)
   360  		} else {
   361  			finalPath = m.Old.Path
   362  		}
   363  		goModPkg := pkg.Package{
   364  			Name:      finalPath,
   365  			Version:   m.New.Version,
   366  			Licenses:  pkg.NewLicenseSet(lics...),
   367  			Locations: file.NewLocationSet(reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
   368  			PURL:      packageURL(finalPath, m.New.Version),
   369  			Language:  pkg.Go,
   370  			Type:      pkg.GoModulePkg,
   371  			Metadata: pkg.GolangModuleEntry{
   372  				H1Digest: digests[fmt.Sprintf("%s %s", finalPath, m.New.Version)],
   373  			},
   374  		}
   375  		goModPkg.SetID()
   376  		goModPackages[finalPath] = goModPkg
   377  	}
   378  }
   379  
   380  func (c *goModCataloger) applyExcludeDirectives(modFile *modfile.File, goModPackages map[string]pkg.Package) {
   381  	for _, m := range modFile.Exclude {
   382  		delete(goModPackages, m.Mod.Path)
   383  	}
   384  }
   385  
   386  func (c *goModCataloger) assembleResults(catalogedPkgs []pkg.Package, goModPackages map[string]pkg.Package) []pkg.Package {
   387  	pkgsSlice := make([]pkg.Package, 0)
   388  
   389  	pkgsSlice = append(pkgsSlice, catalogedPkgs...)
   390  
   391  	for _, p := range goModPackages {
   392  		pkgsSlice = append(pkgsSlice, p)
   393  	}
   394  
   395  	sort.SliceStable(pkgsSlice, func(i, j int) bool {
   396  		return pkgsSlice[i].Name < pkgsSlice[j].Name
   397  	})
   398  
   399  	return pkgsSlice
   400  }
   401  
   402  func parseGoSumFile(resolver file.Resolver, reader file.LocationReadCloser) (map[string]string, error) {
   403  	out := map[string]string{}
   404  
   405  	if resolver == nil {
   406  		return out, fmt.Errorf("no resolver provided")
   407  	}
   408  
   409  	goSumPath := strings.TrimSuffix(reader.RealPath, ".mod") + ".sum"
   410  	goSumLocation := resolver.RelativeFileByPath(reader.Location, goSumPath)
   411  	if goSumLocation == nil {
   412  		return nil, fmt.Errorf("unable to resolve: %s", goSumPath)
   413  	}
   414  	contents, err := resolver.FileContentsByLocation(*goSumLocation)
   415  	if err != nil {
   416  		return nil, err
   417  	}
   418  	defer internal.CloseAndLogError(contents, goSumLocation.AccessPath)
   419  
   420  	// go.sum has the format like:
   421  	// github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
   422  	// github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw=
   423  	// github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
   424  	scanner := bufio.NewScanner(contents)
   425  	// optionally, resize scanner's capacity for lines over 64K, see next example
   426  	for scanner.Scan() {
   427  		line := scanner.Text()
   428  		parts := strings.Split(line, " ")
   429  		if len(parts) < 3 {
   430  			continue
   431  		}
   432  		nameVersion := fmt.Sprintf("%s %s", parts[0], parts[1])
   433  		hash := parts[2]
   434  		out[nameVersion] = hash
   435  	}
   436  
   437  	return out, nil
   438  }
   439  
   440  // createSourceMetadata creates metadata for packages found through source analysis using build.Default
   441  func createSourceMetadata(h1Digest string) pkg.GolangSourceEntry {
   442  	return pkg.GolangSourceEntry{
   443  		H1Digest:        h1Digest,
   444  		OperatingSystem: build.Default.GOOS,
   445  		Architecture:    build.Default.GOARCH,
   446  		BuildTags:       strings.Join(build.Default.BuildTags, ","),
   447  		CgoEnabled:      build.Default.CgoEnabled,
   448  	}
   449  }
   450  
   451  func resolvePkgDir(p *packages.Package) string {
   452  	switch {
   453  	case len(p.GoFiles) > 0:
   454  		return filepath.Dir(p.GoFiles[0])
   455  	case len(p.CompiledGoFiles) > 0:
   456  		return filepath.Dir(p.CompiledGoFiles[0])
   457  	case len(p.OtherFiles) > 0:
   458  		return filepath.Dir(p.OtherFiles[0])
   459  	default:
   460  		return ""
   461  	}
   462  }
   463  
   464  func shouldSkipVisit(p *packages.Package) bool {
   465  	// skip packages that don't have module info
   466  	if p.Module == nil {
   467  		return true
   468  	}
   469  
   470  	// skip stdlib
   471  	if isStdLib(p) {
   472  		return true
   473  	}
   474  
   475  	return false
   476  }
   477  
   478  // isStdLib returns true if this package is part of the Go standard library.
   479  func isStdLib(pkg *packages.Package) bool {
   480  	if pkg.Name == "unsafe" {
   481  		// Special case unsafe stdlib, because it does not contain go files.
   482  		return true
   483  	}
   484  	if len(pkg.GoFiles) == 0 {
   485  		return false
   486  	}
   487  	prefix := build.Default.GOROOT
   488  	sep := string(filepath.Separator)
   489  	if !strings.HasSuffix(prefix, sep) {
   490  		prefix += sep
   491  	}
   492  	return strings.HasPrefix(pkg.GoFiles[0], prefix)
   493  }
   494  
   495  // handle replace directives
   496  func newModule(mod *packages.Module) *packages.Module {
   497  	// Example of a module with replace directive: 	k8s.io/kubernetes => k8s.io/kubernetes v1.11.1
   498  	// {
   499  	//         "Path": "k8s.io/kubernetes",
   500  	//         "Version": "v0.17.9",
   501  	//         "Replace": {
   502  	//                 "Path": "k8s.io/kubernetes",
   503  	//                 "Version": "v1.11.1",
   504  	//                 "Time": "2018-07-17T04:20:29Z",
   505  	//                 "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1",
   506  	//                 "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod"
   507  	//         },
   508  	//         "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1",
   509  	//         "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod"
   510  	// }
   511  	// handle replace directives
   512  	// Note, we specifically want to replace version field.
   513  	// Haven't confirmed, but we may also need to override the
   514  	// entire struct when using replace directive with local folders.
   515  	tmp := *mod
   516  	if tmp.Replace != nil {
   517  		tmp = *tmp.Replace
   518  	}
   519  
   520  	return &tmp
   521  }
   522  
   523  func isRelativeImportOrMain(p string) bool {
   524  	if p == "main" {
   525  		return true
   526  	}
   527  	// true for ".", "..", "./...", "../..."
   528  	return build.IsLocalImport(p)
   529  }