github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/golang/parse_go_binary.go (about)

     1  package golang
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"debug/elf"
     7  	"debug/macho"
     8  	"debug/pe"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"regexp"
    13  	"runtime/debug"
    14  	"strings"
    15  	"time"
    16  
    17  	"golang.org/x/mod/module"
    18  
    19  	"github.com/anchore/syft/internal"
    20  	"github.com/anchore/syft/internal/log"
    21  	"github.com/anchore/syft/syft/artifact"
    22  	"github.com/anchore/syft/syft/file"
    23  	"github.com/anchore/syft/syft/internal/unionreader"
    24  	"github.com/anchore/syft/syft/pkg"
    25  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    26  	"github.com/anchore/syft/syft/pkg/cataloger/golang/internal/xcoff"
    27  )
    28  
    29  const goArch = "GOARCH"
    30  
    31  var (
    32  	// errUnrecognizedFormat is returned when a given executable file doesn't
    33  	// appear to be in a known format, or it breaks the rules of that format,
    34  	// or when there are I/O errors reading the file.
    35  	errUnrecognizedFormat = errors.New("unrecognized file format")
    36  	// devel is used to recognize the current default version when a golang main distribution is built
    37  	// https://github.com/golang/go/issues/29228 this issue has more details on the progress of being able to
    38  	// inject the correct version into the main module of the build process
    39  
    40  	knownBuildFlagPatterns = []*regexp.Regexp{
    41  		regexp.MustCompile(`(?m)\.([gG]it)?([bB]uild)?[vV]er(sion)?=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    42  		regexp.MustCompile(`(?m)\.([tT]ag)=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    43  	}
    44  )
    45  
    46  const devel = "(devel)"
    47  
    48  type goBinaryCataloger struct {
    49  	licenses          goLicenses
    50  	mainModuleVersion MainModuleVersionConfig
    51  }
    52  
    53  func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
    54  	return &goBinaryCataloger{
    55  		licenses:          newGoLicenses(binaryCatalogerName, opts),
    56  		mainModuleVersion: opts.MainModuleVersion,
    57  	}
    58  }
    59  
    60  // parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
    61  func (c *goBinaryCataloger) parseGoBinary(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    62  	var pkgs []pkg.Package
    63  
    64  	unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
    65  	if err != nil {
    66  		return nil, nil, err
    67  	}
    68  
    69  	mods := scanFile(unionReader, reader.RealPath)
    70  	internal.CloseAndLogError(reader.ReadCloser, reader.RealPath)
    71  
    72  	for _, mod := range mods {
    73  		pkgs = append(pkgs, c.buildGoPkgInfo(resolver, reader.Location, mod, mod.arch, unionReader)...)
    74  	}
    75  
    76  	return pkgs, nil, nil
    77  }
    78  
    79  func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) []pkg.Package {
    80  	var pkgs []pkg.Package
    81  	if mod == nil {
    82  		return pkgs
    83  	}
    84  
    85  	var empty debug.Module
    86  	if mod.Main == empty && mod.Path != "" {
    87  		mod.Main = createMainModuleFromPath(mod.Path)
    88  	}
    89  
    90  	for _, dep := range mod.Deps {
    91  		if dep == nil {
    92  			continue
    93  		}
    94  		p := c.newGoBinaryPackage(
    95  			resolver,
    96  			dep,
    97  			mod.Main.Path,
    98  			mod.GoVersion,
    99  			arch,
   100  			nil,
   101  			mod.cryptoSettings,
   102  			location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   103  		)
   104  		if pkg.IsValid(&p) {
   105  			pkgs = append(pkgs, p)
   106  		}
   107  	}
   108  
   109  	if mod.Main == empty {
   110  		return pkgs
   111  	}
   112  
   113  	main := c.makeGoMainPackage(resolver, mod, arch, location, reader)
   114  	pkgs = append(pkgs, main)
   115  
   116  	return pkgs
   117  }
   118  
   119  func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
   120  	gbs := getBuildSettings(mod.Settings)
   121  	main := c.newGoBinaryPackage(
   122  		resolver,
   123  		&mod.Main,
   124  		mod.Main.Path,
   125  		mod.GoVersion,
   126  		arch,
   127  		gbs,
   128  		mod.cryptoSettings,
   129  		location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   130  	)
   131  
   132  	if main.Version != devel {
   133  		// found a full package with a non-development version... return it as is...
   134  		return main
   135  	}
   136  
   137  	// we have a package, but the version is "devel"... let's try and find a better answer
   138  	var metadata *pkg.GolangBinaryBuildinfoEntry
   139  	if v, ok := main.Metadata.(pkg.GolangBinaryBuildinfoEntry); ok {
   140  		metadata = &v
   141  	}
   142  	version := c.findMainModuleVersion(metadata, gbs, reader)
   143  
   144  	if version != "" {
   145  		main.Version = version
   146  		main.PURL = packageURL(main.Name, main.Version)
   147  
   148  		main.SetID()
   149  	}
   150  
   151  	return main
   152  }
   153  
   154  // this is checking for (.L)? because at least one binary seems to have \xA0L preceding the version string, but for some reason
   155  // this is unable to be matched by the regex here as \x00\xA0L;
   156  // the only thing that seems to work is to just look for version strings following both \x00 and \x00.L for now
   157  var semverPattern = regexp.MustCompile(`\x00(.L)?(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`)
   158  
   159  func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuildinfoEntry, gbs pkg.KeyValues, reader io.ReadSeekCloser) string {
   160  	vcsVersion, hasVersion := gbs.Get("vcs.revision")
   161  	timestamp, hasTimestamp := gbs.Get("vcs.time")
   162  
   163  	var ldflags, majorVersion, fullVersion string
   164  	if c.mainModuleVersion.FromLDFlags && metadata != nil {
   165  		// we've found a specific version from the ldflags! use it as the version.
   166  		// why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)?
   167  		// short answer: we're assuming that if a specific semver was provided in the ldflags that
   168  		// there is a matching vcs tag to match that could be referenced. This assumption could
   169  		// be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical
   170  		// version of the package.
   171  		ldflags, _ = metadata.BuildSettings.Get("-ldflags")
   172  
   173  		majorVersion, fullVersion = extractVersionFromLDFlags(ldflags)
   174  		if fullVersion != "" {
   175  			return fullVersion
   176  		}
   177  	}
   178  
   179  	// guess the version from pattern matching in the binary (can result in false positives)
   180  	if c.mainModuleVersion.FromContents {
   181  		_, err := reader.Seek(0, io.SeekStart)
   182  		if err != nil {
   183  			log.WithFields("error", err).Trace("unable to seek to start of go binary reader")
   184  		} else {
   185  			if v := extractVersionFromContents(reader); v != "" {
   186  				return v
   187  			}
   188  		}
   189  	}
   190  
   191  	// fallback to using the go standard pseudo v0.0.0 version
   192  	if c.mainModuleVersion.FromBuildSettings && hasVersion && hasTimestamp {
   193  		version := vcsVersion
   194  		//NOTE: err is ignored, because if parsing fails
   195  		// we still use the empty Time{} struct to generate an empty date, like 00010101000000
   196  		// for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions
   197  		ts, _ := time.Parse(time.RFC3339, timestamp)
   198  		if len(vcsVersion) >= 12 {
   199  			version = vcsVersion[:12]
   200  		}
   201  
   202  		return module.PseudoVersion(majorVersion, fullVersion, ts, version)
   203  	}
   204  
   205  	return ""
   206  }
   207  
   208  func extractVersionFromContents(reader io.Reader) string {
   209  	contents, err := io.ReadAll(reader)
   210  	if err != nil {
   211  		log.WithFields("error", err).Trace("unable to read from go binary reader")
   212  		return ""
   213  	}
   214  	matchMetadata := internal.MatchNamedCaptureGroups(semverPattern, string(contents))
   215  
   216  	version, ok := matchMetadata["version"]
   217  	if ok {
   218  		return version
   219  	}
   220  	return ""
   221  }
   222  
   223  func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion string) {
   224  	if ldflags == "" {
   225  		return "", ""
   226  	}
   227  
   228  	for _, pattern := range knownBuildFlagPatterns {
   229  		groups := internal.MatchNamedCaptureGroups(pattern, ldflags)
   230  		v, ok := groups["version"]
   231  
   232  		if !ok {
   233  			continue
   234  		}
   235  
   236  		fullVersion = v
   237  		if !strings.HasPrefix(v, "v") {
   238  			fullVersion = fmt.Sprintf("v%s", v)
   239  		}
   240  		components := strings.Split(v, ".")
   241  
   242  		if len(components) == 0 {
   243  			continue
   244  		}
   245  
   246  		majorVersion = strings.TrimPrefix(components[0], "v")
   247  		return majorVersion, fullVersion
   248  	}
   249  
   250  	return "", ""
   251  }
   252  
   253  func getGOARCH(settings []debug.BuildSetting) string {
   254  	for _, s := range settings {
   255  		if s.Key == goArch {
   256  			return s.Value
   257  		}
   258  	}
   259  
   260  	return ""
   261  }
   262  
   263  func getGOARCHFromBin(r io.ReaderAt) (string, error) {
   264  	// Read the first bytes of the file to identify the format, then delegate to
   265  	// a format-specific function to load segment and section headers.
   266  	ident := make([]byte, 16)
   267  	if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil {
   268  		return "", fmt.Errorf("unrecognized file format: %w", err)
   269  	}
   270  
   271  	var arch string
   272  	switch {
   273  	case bytes.HasPrefix(ident, []byte("\x7FELF")):
   274  		f, err := elf.NewFile(r)
   275  		if err != nil {
   276  			return "", fmt.Errorf("unrecognized file format: %w", err)
   277  		}
   278  		arch = f.Machine.String()
   279  	case bytes.HasPrefix(ident, []byte("MZ")):
   280  		f, err := pe.NewFile(r)
   281  		if err != nil {
   282  			return "", fmt.Errorf("unrecognized file format: %w", err)
   283  		}
   284  		arch = fmt.Sprintf("%d", f.Machine)
   285  	case bytes.HasPrefix(ident, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(ident[1:], []byte("\xFA\xED\xFE")):
   286  		f, err := macho.NewFile(r)
   287  		if err != nil {
   288  			return "", fmt.Errorf("unrecognized file format: %w", err)
   289  		}
   290  		arch = f.Cpu.String()
   291  	case bytes.HasPrefix(ident, []byte{0x01, 0xDF}) || bytes.HasPrefix(ident, []byte{0x01, 0xF7}):
   292  		f, err := xcoff.NewFile(r)
   293  		if err != nil {
   294  			return "", fmt.Errorf("unrecognized file format: %w", err)
   295  		}
   296  		arch = fmt.Sprintf("%d", f.FileHeader.TargetMachine)
   297  	default:
   298  		return "", errUnrecognizedFormat
   299  	}
   300  
   301  	arch = strings.Replace(arch, "EM_", "", 1)
   302  	arch = strings.Replace(arch, "Cpu", "", 1)
   303  	arch = strings.ToLower(arch)
   304  
   305  	return arch, nil
   306  }
   307  
   308  func getBuildSettings(settings []debug.BuildSetting) pkg.KeyValues {
   309  	m := make(pkg.KeyValues, 0)
   310  	for _, s := range settings {
   311  		m = append(m, pkg.KeyValue{
   312  			Key:   s.Key,
   313  			Value: s.Value,
   314  		})
   315  	}
   316  	return m
   317  }
   318  
   319  func createMainModuleFromPath(path string) (mod debug.Module) {
   320  	mod.Path = path
   321  	mod.Version = devel
   322  	return
   323  }