github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/parse_go_binary.go (about)

     1  package golang
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"debug/elf"
     7  	"debug/macho"
     8  	"debug/pe"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"regexp"
    13  	"runtime/debug"
    14  	"slices"
    15  	"strings"
    16  	"time"
    17  
    18  	"golang.org/x/mod/module"
    19  
    20  	"github.com/anchore/syft/internal"
    21  	"github.com/anchore/syft/internal/log"
    22  	"github.com/anchore/syft/syft/artifact"
    23  	"github.com/anchore/syft/syft/file"
    24  	"github.com/anchore/syft/syft/internal/unionreader"
    25  	"github.com/anchore/syft/syft/pkg"
    26  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    27  	"github.com/anchore/syft/syft/pkg/cataloger/golang/internal/xcoff"
    28  )
    29  
    30  const goArch = "GOARCH"
    31  
    32  var (
    33  	// errUnrecognizedFormat is returned when a given executable file doesn't
    34  	// appear to be in a known format, or it breaks the rules of that format,
    35  	// or when there are I/O errors reading the file.
    36  	errUnrecognizedFormat = errors.New("unrecognized file format")
    37  	// devel is used to recognize the current default version when a golang main distribution is built
    38  	// https://github.com/golang/go/issues/29228 this issue has more details on the progress of being able to
    39  	// inject the correct version into the main module of the build process
    40  
    41  	knownBuildFlagPatterns = []*regexp.Regexp{
    42  		regexp.MustCompile(`(?m)\.[a-zA-Z0-9]*([rR]elease)?([gG]it)?([bB]uild)?[vV]er(sion)?=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    43  		regexp.MustCompile(`(?m)\.[a-zA-Z0-9]*([tT]ag)=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    44  	}
    45  )
    46  
    47  const devel = "(devel)"
    48  
    49  type goBinaryCataloger struct {
    50  	licenseResolver   goLicenseResolver
    51  	mainModuleVersion MainModuleVersionConfig
    52  }
    53  
    54  func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
    55  	return &goBinaryCataloger{
    56  		licenseResolver:   newGoLicenseResolver(binaryCatalogerName, opts),
    57  		mainModuleVersion: opts.MainModuleVersion,
    58  	}
    59  }
    60  
    61  // parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
    62  func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    63  	var pkgs []pkg.Package
    64  
    65  	unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
    66  	if err != nil {
    67  		return nil, nil, err
    68  	}
    69  	defer internal.CloseAndLogError(reader.ReadCloser, reader.RealPath)
    70  
    71  	mods, errs := scanFile(reader.Location, unionReader)
    72  
    73  	var rels []artifact.Relationship
    74  	for _, mod := range mods {
    75  		var depPkgs []pkg.Package
    76  		mainPkg, depPkgs := c.buildGoPkgInfo(ctx, resolver, reader.Location, mod, mod.arch, unionReader)
    77  		if mainPkg != nil {
    78  			rels = createModuleRelationships(*mainPkg, depPkgs)
    79  			pkgs = append(pkgs, *mainPkg)
    80  		}
    81  		pkgs = append(pkgs, depPkgs...)
    82  	}
    83  
    84  	return pkgs, rels, errs
    85  }
    86  
    87  func createModuleRelationships(main pkg.Package, deps []pkg.Package) []artifact.Relationship {
    88  	var relationships []artifact.Relationship
    89  
    90  	for _, dep := range deps {
    91  		relationships = append(relationships, artifact.Relationship{
    92  			From: dep,
    93  			To:   main,
    94  			Type: artifact.DependencyOfRelationship,
    95  		})
    96  	}
    97  
    98  	return relationships
    99  }
   100  
   101  // moduleEqual is used to deduplicate go modules especially the sub module may be identical to the main one
   102  func moduleEqual(lhs, rhs *debug.Module) bool {
   103  	if lhs == rhs {
   104  		return true
   105  	}
   106  	if lhs == nil || rhs == nil {
   107  		return false
   108  	}
   109  
   110  	if lhs.Path != rhs.Path ||
   111  		lhs.Version != rhs.Version ||
   112  		lhs.Sum != rhs.Sum {
   113  		return false
   114  	}
   115  
   116  	return moduleEqual(lhs.Replace, rhs.Replace)
   117  }
   118  
   119  var emptyModule debug.Module
   120  var moduleFromPartialPackageBuild = debug.Module{Path: "command-line-arguments"}
   121  
   122  func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) (*pkg.Package, []pkg.Package) {
   123  	if mod == nil {
   124  		return nil, nil
   125  	}
   126  
   127  	if missingMainModule(mod) {
   128  		mod.Main = createMainModuleFromPath(mod)
   129  	}
   130  
   131  	var pkgs []pkg.Package
   132  	for _, dep := range mod.Deps {
   133  		if dep == nil {
   134  			continue
   135  		}
   136  		if moduleEqual(dep, &mod.Main) {
   137  			continue
   138  		}
   139  		lics := c.licenseResolver.getLicenses(ctx, resolver, dep.Path, dep.Version)
   140  		gover, experiments := getExperimentsFromVersion(mod.GoVersion)
   141  
   142  		m := newBinaryMetadata(
   143  			dep,
   144  			mod.Main.Path,
   145  			gover,
   146  			arch,
   147  			nil,
   148  			mod.cryptoSettings,
   149  			experiments,
   150  		)
   151  
   152  		p := c.newGoBinaryPackage(
   153  			dep,
   154  			m,
   155  			lics,
   156  			location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   157  		)
   158  		if pkg.IsValid(&p) {
   159  			pkgs = append(pkgs, p)
   160  		}
   161  	}
   162  
   163  	if mod.Main == emptyModule {
   164  		return nil, pkgs
   165  	}
   166  
   167  	main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader)
   168  
   169  	return &main, pkgs
   170  }
   171  
   172  func missingMainModule(mod *extendedBuildInfo) bool {
   173  	if mod.Main == emptyModule && mod.Path != "" {
   174  		return true
   175  	}
   176  	// special case: when invoking go build with a source file and not a package (directory) then you will
   177  	// see "command-line-arguments" as the main module path... even though that's not the main module. In this
   178  	// circumstance, we should treat the main module as missing and search for it within the dependencies.
   179  	return mod.Main == moduleFromPartialPackageBuild
   180  }
   181  
   182  func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
   183  	gbs := getBuildSettings(mod.Settings)
   184  	lics := c.licenseResolver.getLicenses(ctx, resolver, mod.Main.Path, mod.Main.Version)
   185  	gover, experiments := getExperimentsFromVersion(mod.GoVersion)
   186  
   187  	m := newBinaryMetadata(
   188  		&mod.Main,
   189  		mod.Main.Path,
   190  		gover,
   191  		arch,
   192  		gbs,
   193  		mod.cryptoSettings,
   194  		experiments,
   195  	)
   196  
   197  	if mod.Main.Version == devel {
   198  		version := c.findMainModuleVersion(&m, gbs, reader)
   199  
   200  		if version != "" {
   201  			// make sure version is prefixed with v as some build systems parsed
   202  			// during `findMainModuleVersion` can include incomplete semver
   203  			// vx.x.x is correct
   204  			version = ensurePrefix(version, "v")
   205  		}
   206  		mod.Main.Version = version
   207  	}
   208  
   209  	main := c.newGoBinaryPackage(
   210  		&mod.Main,
   211  		m,
   212  		lics,
   213  		location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   214  	)
   215  
   216  	return main
   217  }
   218  
   219  // this is checking for (.L)? because at least one binary seems to have \xA0L preceding the version string, but for some reason
   220  // this is unable to be matched by the regex here as \x00\xA0L;
   221  // the only thing that seems to work is to just look for version strings following both \x00 and \x00.L for now
   222  var semverPattern = regexp.MustCompile(`(\x00|\x{FFFD})(.L)?(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`)
   223  
   224  func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuildinfoEntry, gbs pkg.KeyValues, reader io.ReadSeekCloser) string {
   225  	vcsVersion, hasVersion := gbs.Get("vcs.revision")
   226  	timestamp, hasTimestamp := gbs.Get("vcs.time")
   227  
   228  	var ldflags, majorVersion, fullVersion string
   229  	if c.mainModuleVersion.FromLDFlags && metadata != nil {
   230  		// we've found a specific version from the ldflags! use it as the version.
   231  		// why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)?
   232  		// short answer: we're assuming that if a specific semver was provided in the ldflags that
   233  		// there is a matching vcs tag to match that could be referenced. This assumption could
   234  		// be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical
   235  		// version of the package.
   236  		ldflags, _ = metadata.BuildSettings.Get("-ldflags")
   237  
   238  		majorVersion, fullVersion = extractVersionFromLDFlags(ldflags, metadata.MainModule)
   239  		if fullVersion != "" {
   240  			return fullVersion
   241  		}
   242  	}
   243  
   244  	// guess the version from pattern matching in the binary (can result in false positives)
   245  	if c.mainModuleVersion.FromContents {
   246  		_, err := reader.Seek(0, io.SeekStart)
   247  		if err != nil {
   248  			log.WithFields("error", err).Trace("unable to seek to start of go binary reader")
   249  		} else {
   250  			if v := extractVersionFromContents(reader); v != "" {
   251  				return v
   252  			}
   253  		}
   254  	}
   255  
   256  	// fallback to using the go standard pseudo v0.0.0 version
   257  	if c.mainModuleVersion.FromBuildSettings && hasVersion && hasTimestamp {
   258  		version := vcsVersion
   259  		//NOTE: err is ignored, because if parsing fails
   260  		// we still use the empty Time{} struct to generate an empty date, like 00010101000000
   261  		// for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions
   262  		ts, _ := time.Parse(time.RFC3339, timestamp)
   263  		if len(vcsVersion) >= 12 {
   264  			version = vcsVersion[:12]
   265  		}
   266  
   267  		return module.PseudoVersion(majorVersion, fullVersion, ts, version)
   268  	}
   269  
   270  	return ""
   271  }
   272  
   273  func extractVersionFromContents(reader io.Reader) string {
   274  	matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(semverPattern, reader)
   275  	if err != nil {
   276  		log.WithFields("error", err).Trace("unable to extract version from go binary reader")
   277  		return ""
   278  	}
   279  
   280  	version, ok := matchMetadata["version"]
   281  	if ok {
   282  		return version
   283  	}
   284  	return ""
   285  }
   286  
   287  func extractVersionFromLDFlags(ldflags string, maimModule string) (majorVersion string, fullVersion string) {
   288  	if ldflags == "" {
   289  		return "", ""
   290  	}
   291  
   292  	for _, pattern := range knownBuildFlagPatterns {
   293  		newPattern := regexp.MustCompile(fmt.Sprintf(`(main|%s\/[^\s]*)%s`, strings.ReplaceAll(maimModule, "/", "\\/"), pattern.String()))
   294  		groups := internal.MatchNamedCaptureGroups(newPattern, ldflags)
   295  		v, ok := groups["version"]
   296  
   297  		if !ok {
   298  			continue
   299  		}
   300  
   301  		fullVersion = v
   302  		if !strings.HasPrefix(v, "v") {
   303  			fullVersion = fmt.Sprintf("v%s", v)
   304  		}
   305  		components := strings.Split(v, ".")
   306  
   307  		if len(components) == 0 {
   308  			continue
   309  		}
   310  
   311  		majorVersion = strings.TrimPrefix(components[0], "v")
   312  		return majorVersion, fullVersion
   313  	}
   314  
   315  	return "", ""
   316  }
   317  
   318  func getGOARCH(settings []debug.BuildSetting) string {
   319  	for _, s := range settings {
   320  		if s.Key == goArch {
   321  			return s.Value
   322  		}
   323  	}
   324  
   325  	return ""
   326  }
   327  
   328  func getGOARCHFromBin(r io.ReaderAt) (string, error) {
   329  	// Read the first bytes of the file to identify the format, then delegate to
   330  	// a format-specific function to load segment and section headers.
   331  	ident := make([]byte, 16)
   332  	if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil {
   333  		return "", fmt.Errorf("unrecognized file format: %w", err)
   334  	}
   335  
   336  	var arch string
   337  	switch {
   338  	case bytes.HasPrefix(ident, []byte("\x7FELF")):
   339  		f, err := elf.NewFile(r)
   340  		if err != nil {
   341  			return "", fmt.Errorf("unrecognized file format: %w", err)
   342  		}
   343  		arch = f.Machine.String()
   344  	case bytes.HasPrefix(ident, []byte("MZ")):
   345  		f, err := pe.NewFile(r)
   346  		if err != nil {
   347  			return "", fmt.Errorf("unrecognized file format: %w", err)
   348  		}
   349  		arch = fmt.Sprintf("%d", f.Machine)
   350  	case bytes.HasPrefix(ident, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(ident[1:], []byte("\xFA\xED\xFE")):
   351  		f, err := macho.NewFile(r)
   352  		if err != nil {
   353  			return "", fmt.Errorf("unrecognized file format: %w", err)
   354  		}
   355  		arch = f.Cpu.String()
   356  	case bytes.HasPrefix(ident, []byte{0x01, 0xDF}) || bytes.HasPrefix(ident, []byte{0x01, 0xF7}):
   357  		f, err := xcoff.NewFile(r)
   358  		if err != nil {
   359  			return "", fmt.Errorf("unrecognized file format: %w", err)
   360  		}
   361  		arch = fmt.Sprintf("%d", f.TargetMachine)
   362  	default:
   363  		return "", errUnrecognizedFormat
   364  	}
   365  
   366  	arch = strings.Replace(arch, "EM_", "", 1)
   367  	arch = strings.Replace(arch, "Cpu", "", 1)
   368  	arch = strings.ToLower(arch)
   369  
   370  	return arch, nil
   371  }
   372  
   373  func getBuildSettings(settings []debug.BuildSetting) pkg.KeyValues {
   374  	m := make(pkg.KeyValues, 0)
   375  	for _, s := range settings {
   376  		m = append(m, pkg.KeyValue{
   377  			Key:   s.Key,
   378  			Value: s.Value,
   379  		})
   380  	}
   381  	return m
   382  }
   383  
   384  func getExperimentsFromVersion(version string) (string, []string) {
   385  	// See: https://github.com/anchore/grype/issues/1851
   386  	var experiments []string
   387  	version, rest, ok := strings.Cut(version, " ")
   388  	if ok {
   389  		// Assume they may add more non-version chunks in the future, so only look for "X:".
   390  		for _, chunk := range strings.Split(rest, " ") {
   391  			if strings.HasPrefix(rest, "X:") {
   392  				csv := strings.TrimPrefix(chunk, "X:")
   393  				experiments = append(experiments, strings.Split(csv, ",")...)
   394  			}
   395  		}
   396  	}
   397  
   398  	return version, experiments
   399  }
   400  
   401  func createMainModuleFromPath(existing *extendedBuildInfo) debug.Module {
   402  	// search for a main module candidate within the dependencies
   403  	var mainModuleCandidates []debug.Module
   404  	var usedIndex int
   405  	for i, dep := range existing.Deps {
   406  		if dep == nil {
   407  			continue
   408  		}
   409  
   410  		if dep.Version == devel {
   411  			usedIndex = i
   412  			mainModuleCandidates = append(mainModuleCandidates, *dep)
   413  		}
   414  	}
   415  	if len(mainModuleCandidates) == 1 {
   416  		// we need to prune the dependency from module list
   417  		existing.Deps = slices.Delete(existing.Deps, usedIndex, usedIndex+1)
   418  		return mainModuleCandidates[0]
   419  	}
   420  
   421  	// otherwise craft a main module from the path (a bit of a cop out, but allows us to have a main module)
   422  	return debug.Module{
   423  		Path:    existing.Path,
   424  		Version: devel,
   425  	}
   426  }
   427  
   428  func ensurePrefix(s, prefix string) string {
   429  	if !strings.HasPrefix(s, prefix) {
   430  		return prefix + s
   431  	}
   432  	return s
   433  }