github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/golang/parse_go_binary.go (about)

     1  package golang
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"regexp"
    12  	"runtime/debug"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/nextlinux/gosbom/gosbom/artifact"
    17  	"github.com/nextlinux/gosbom/gosbom/file"
    18  	"github.com/nextlinux/gosbom/gosbom/pkg"
    19  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    20  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/golang/internal/xcoff"
    21  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/internal/unionreader"
    22  	"github.com/nextlinux/gosbom/internal"
    23  	"github.com/nextlinux/gosbom/internal/log"
    24  	"golang.org/x/mod/module"
    25  )
    26  
    27  const GOARCH = "GOARCH"
    28  
    29  var (
    30  	// errUnrecognizedFormat is returned when a given executable file doesn't
    31  	// appear to be in a known format, or it breaks the rules of that format,
    32  	// or when there are I/O errors reading the file.
    33  	errUnrecognizedFormat = errors.New("unrecognized file format")
    34  	// devel is used to recognize the current default version when a golang main distribution is built
    35  	// https://github.com/golang/go/issues/29228 this issue has more details on the progress of being able to
    36  	// inject the correct version into the main module of the build process
    37  
    38  	knownBuildFlagPatterns = []*regexp.Regexp{
    39  		regexp.MustCompile(`(?m)\.([gG]it)?([bB]uild)?[vV]ersion=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    40  		regexp.MustCompile(`(?m)\.([tT]ag)=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
    41  	}
    42  )
    43  
    44  const devel = "(devel)"
    45  
    46  type goBinaryCataloger struct {
    47  	licenses goLicenses
    48  }
    49  
    50  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing rpm db installation.
    51  func (c *goBinaryCataloger) parseGoBinary(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    52  	var pkgs []pkg.Package
    53  
    54  	unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
    55  	if err != nil {
    56  		return nil, nil, err
    57  	}
    58  
    59  	mods, archs := scanFile(unionReader, reader.RealPath)
    60  	internal.CloseAndLogError(reader.ReadCloser, reader.RealPath)
    61  
    62  	for i, mod := range mods {
    63  		pkgs = append(pkgs, c.buildGoPkgInfo(resolver, reader.Location, mod, archs[i])...)
    64  	}
    65  	return pkgs, nil, nil
    66  }
    67  
    68  func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *debug.BuildInfo, arch string, location file.Location) pkg.Package {
    69  	gbs := getBuildSettings(mod.Settings)
    70  	main := c.newGoBinaryPackage(
    71  		resolver,
    72  		&mod.Main,
    73  		mod.Main.Path,
    74  		mod.GoVersion,
    75  		arch,
    76  		gbs,
    77  		location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    78  	)
    79  
    80  	if main.Version != devel {
    81  		return main
    82  	}
    83  
    84  	version, hasVersion := gbs["vcs.revision"]
    85  	timestamp, hasTimestamp := gbs["vcs.time"]
    86  
    87  	var ldflags string
    88  	if metadata, ok := main.Metadata.(pkg.GolangBinMetadata); ok {
    89  		// we've found a specific version from the ldflags! use it as the version.
    90  		// why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)?
    91  		// short answer: we're assuming that if a specific semver was provided in the ldflags that
    92  		// there is a matching vcs tag to match that could be referenced. This assumption could
    93  		// be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical
    94  		// version of the package.
    95  		ldflags = metadata.BuildSettings["-ldflags"]
    96  	}
    97  
    98  	majorVersion, fullVersion := extractVersionFromLDFlags(ldflags)
    99  	if fullVersion != "" {
   100  		version = fullVersion
   101  	} else if hasVersion && hasTimestamp {
   102  		//NOTE: err is ignored, because if parsing fails
   103  		// we still use the empty Time{} struct to generate an empty date, like 00010101000000
   104  		// for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions
   105  		ts, _ := time.Parse(time.RFC3339, timestamp)
   106  		if len(version) >= 12 {
   107  			version = version[:12]
   108  		}
   109  
   110  		version = module.PseudoVersion(majorVersion, fullVersion, ts, version)
   111  	}
   112  	if version != "" {
   113  		main.Version = version
   114  		main.PURL = packageURL(main.Name, main.Version)
   115  
   116  		main.SetID()
   117  	}
   118  
   119  	return main
   120  }
   121  
   122  func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion string) {
   123  	if ldflags == "" {
   124  		return "", ""
   125  	}
   126  
   127  	for _, pattern := range knownBuildFlagPatterns {
   128  		groups := internal.MatchNamedCaptureGroups(pattern, ldflags)
   129  		v, ok := groups["version"]
   130  
   131  		if !ok {
   132  			continue
   133  		}
   134  
   135  		fullVersion = v
   136  		if !strings.HasPrefix(v, "v") {
   137  			fullVersion = fmt.Sprintf("v%s", v)
   138  		}
   139  		components := strings.Split(v, ".")
   140  
   141  		if len(components) == 0 {
   142  			continue
   143  		}
   144  
   145  		majorVersion = strings.TrimPrefix(components[0], "v")
   146  		return majorVersion, fullVersion
   147  	}
   148  
   149  	return "", ""
   150  }
   151  
   152  // getArchs finds a binary architecture by two ways:
   153  // 1) reading build info from binaries compiled by go1.18+
   154  // 2) reading file headers from binaries compiled by < go1.18
   155  func getArchs(readers []io.ReaderAt, builds []*debug.BuildInfo) []string {
   156  	if len(readers) != len(builds) {
   157  		log.Trace("golang cataloger: bin parsing: number of builds and readers doesn't match")
   158  		return nil
   159  	}
   160  
   161  	if len(readers) == 0 || len(builds) == 0 {
   162  		log.Tracef("golang cataloger: bin parsing: %d readers and %d build info items", len(readers), len(builds))
   163  		return nil
   164  	}
   165  
   166  	archs := make([]string, len(builds))
   167  	for i, build := range builds {
   168  		archs[i] = getGOARCH(build.Settings)
   169  	}
   170  
   171  	// if architecture was found via build settings return
   172  	if archs[0] != "" {
   173  		return archs
   174  	}
   175  
   176  	for i, r := range readers {
   177  		a, err := getGOARCHFromBin(r)
   178  		if err != nil {
   179  			log.Tracef("golang cataloger: bin parsing: getting arch from binary: %v", err)
   180  			continue
   181  		}
   182  
   183  		archs[i] = a
   184  	}
   185  	return archs
   186  }
   187  
   188  func getGOARCH(settings []debug.BuildSetting) string {
   189  	for _, s := range settings {
   190  		if s.Key == GOARCH {
   191  			return s.Value
   192  		}
   193  	}
   194  
   195  	return ""
   196  }
   197  
   198  func getGOARCHFromBin(r io.ReaderAt) (string, error) {
   199  	// Read the first bytes of the file to identify the format, then delegate to
   200  	// a format-specific function to load segment and section headers.
   201  	ident := make([]byte, 16)
   202  	if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil {
   203  		return "", fmt.Errorf("unrecognized file format: %w", err)
   204  	}
   205  
   206  	var arch string
   207  	switch {
   208  	case bytes.HasPrefix(ident, []byte("\x7FELF")):
   209  		f, err := elf.NewFile(r)
   210  		if err != nil {
   211  			return "", fmt.Errorf("unrecognized file format: %w", err)
   212  		}
   213  		arch = f.Machine.String()
   214  	case bytes.HasPrefix(ident, []byte("MZ")):
   215  		f, err := pe.NewFile(r)
   216  		if err != nil {
   217  			return "", fmt.Errorf("unrecognized file format: %w", err)
   218  		}
   219  		arch = fmt.Sprintf("%d", f.Machine)
   220  	case bytes.HasPrefix(ident, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(ident[1:], []byte("\xFA\xED\xFE")):
   221  		f, err := macho.NewFile(r)
   222  		if err != nil {
   223  			return "", fmt.Errorf("unrecognized file format: %w", err)
   224  		}
   225  		arch = f.Cpu.String()
   226  	case bytes.HasPrefix(ident, []byte{0x01, 0xDF}) || bytes.HasPrefix(ident, []byte{0x01, 0xF7}):
   227  		f, err := xcoff.NewFile(r)
   228  		if err != nil {
   229  			return "", fmt.Errorf("unrecognized file format: %w", err)
   230  		}
   231  		arch = fmt.Sprintf("%d", f.FileHeader.TargetMachine)
   232  	default:
   233  		return "", errUnrecognizedFormat
   234  	}
   235  
   236  	arch = strings.Replace(arch, "EM_", "", 1)
   237  	arch = strings.Replace(arch, "Cpu", "", 1)
   238  	arch = strings.ToLower(arch)
   239  
   240  	return arch, nil
   241  }
   242  
   243  func getBuildSettings(settings []debug.BuildSetting) map[string]string {
   244  	m := make(map[string]string)
   245  	for _, s := range settings {
   246  		m[s.Key] = s.Value
   247  	}
   248  	return m
   249  }
   250  
   251  func createMainModuleFromPath(path string) (mod debug.Module) {
   252  	mod.Path = path
   253  	mod.Version = devel
   254  	return
   255  }
   256  
   257  func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file.Location, mod *debug.BuildInfo, arch string) []pkg.Package {
   258  	var pkgs []pkg.Package
   259  	if mod == nil {
   260  		return pkgs
   261  	}
   262  
   263  	var empty debug.Module
   264  	if mod.Main == empty && mod.Path != "" {
   265  		mod.Main = createMainModuleFromPath(mod.Path)
   266  	}
   267  
   268  	for _, dep := range mod.Deps {
   269  		if dep == nil {
   270  			continue
   271  		}
   272  		p := c.newGoBinaryPackage(
   273  			resolver,
   274  			dep,
   275  			mod.Main.Path,
   276  			mod.GoVersion,
   277  			arch,
   278  			nil,
   279  			location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   280  		)
   281  		if pkg.IsValid(&p) {
   282  			pkgs = append(pkgs, p)
   283  		}
   284  	}
   285  
   286  	if mod.Main == empty {
   287  		return pkgs
   288  	}
   289  
   290  	main := c.makeGoMainPackage(resolver, mod, arch, location)
   291  	pkgs = append(pkgs, main)
   292  
   293  	return pkgs
   294  }