github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/golang/gobinary/gobinary.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package gobinary extracts packages from buildinfo inside go binaries files.
    16  package gobinary
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"debug/buildinfo"
    22  	"errors"
    23  	"io"
    24  	"io/fs"
    25  	"regexp"
    26  	"runtime/debug"
    27  	"strings"
    28  
    29  	"github.com/google/osv-scalibr/extractor"
    30  	"github.com/google/osv-scalibr/extractor/filesystem"
    31  	"github.com/google/osv-scalibr/inventory"
    32  	"github.com/google/osv-scalibr/log"
    33  	"github.com/google/osv-scalibr/plugin"
    34  	"github.com/google/osv-scalibr/purl"
    35  	"github.com/google/osv-scalibr/stats"
    36  
    37  	cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
    38  )
    39  
    40  const (
    41  	// Name is the unique name of this extractor.
    42  	Name = "go/binary"
    43  	// devel is the version of the development binary.
    44  	devel = "(devel)"
    45  )
    46  
    47  var (
    48  	// reVersion is a regexp to parse the Go version from the binary content.
    49  	reVersion = regexp.MustCompile(`(\x00|\x{FFFD})(.L)?(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`)
    50  )
    51  
    52  // Extractor extracts packages from buildinfo inside go binaries files.
    53  type Extractor struct {
    54  
    55  	// Stats is a stats collector for reporting metrics.
    56  	Stats stats.Collector
    57  
    58  	// maxFileSizeBytes is the maximum size of a file that can be extracted.
    59  	// If this limit is greater than zero and a file is encountered that is larger
    60  	// than this limit, the file is ignored by returning false for `FileRequired`.
    61  	maxFileSizeBytes int64
    62  
    63  	// versionFromBinary enables extracting the module version from the binary content.
    64  	// This operation is expensive because it uses a regexp to parse the binary content.
    65  	versionFromContent bool
    66  }
    67  
    68  // New returns a Go binary extractor.
    69  func New(cfg *cpb.PluginConfig) filesystem.Extractor {
    70  	e := &Extractor{maxFileSizeBytes: cfg.MaxFileSizeBytes}
    71  	specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.GoBinaryConfig { return c.GetGoBinary() })
    72  	e.versionFromContent = specific.GetVersionFromContent()
    73  	return e
    74  }
    75  
    76  // Name of the extractor.
    77  func (e Extractor) Name() string { return Name }
    78  
    79  // Version of the extractor.
    80  func (e Extractor) Version() int { return 0 }
    81  
    82  // Requirements of the extractor.
    83  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    84  
    85  // FileRequired returns true if the specified file is marked executable.
    86  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    87  	if !filesystem.IsInterestingExecutable(api) {
    88  		return false
    89  	}
    90  
    91  	fileinfo, err := api.Stat()
    92  	if err != nil {
    93  		return false
    94  	}
    95  
    96  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
    97  		e.reportFileRequired(api.Path(), fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
    98  		return false
    99  	}
   100  
   101  	e.reportFileRequired(api.Path(), fileinfo.Size(), stats.FileRequiredResultOK)
   102  	return true
   103  }
   104  
   105  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   106  	if e.Stats == nil {
   107  		return
   108  	}
   109  	e.Stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   110  		Path:          path,
   111  		Result:        result,
   112  		FileSizeBytes: fileSizeBytes,
   113  	})
   114  }
   115  
   116  // Extract returns a list of installed third party dependencies in a Go binary.
   117  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   118  	var readerAt io.ReaderAt
   119  	if fileWithReaderAt, ok := input.Reader.(io.ReaderAt); ok {
   120  		readerAt = fileWithReaderAt
   121  	} else {
   122  		buf := bytes.NewBuffer([]byte{})
   123  		_, err := io.Copy(buf, input.Reader)
   124  		if err != nil {
   125  			return inventory.Inventory{}, err
   126  		}
   127  		readerAt = bytes.NewReader(buf.Bytes())
   128  	}
   129  
   130  	binfo, err := buildinfo.Read(readerAt)
   131  	if err != nil {
   132  		log.Debugf("error parsing the contents of Go binary (%s) for extraction: %v", input.Path, err)
   133  		e.reportFileExtracted(input.Path, input.Info, err)
   134  		return inventory.Inventory{}, nil
   135  	}
   136  
   137  	pkg := e.extractPackagesFromBuildInfo(binfo, input.Path)
   138  	mainPkg := mainModule(binfo, input.Path)
   139  	if mainPkg != nil {
   140  		if mainPkg.Version == devel && e.versionFromContent {
   141  			if version := extractVersionFromConent(input.Reader); version != "" {
   142  				mainPkg.Version = version
   143  			}
   144  		}
   145  		pkg = append(pkg, mainPkg)
   146  	}
   147  	e.reportFileExtracted(input.Path, input.Info, nil)
   148  	return inventory.Inventory{Packages: pkg}, nil
   149  }
   150  
   151  func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, err error) {
   152  	if e.Stats == nil {
   153  		return
   154  	}
   155  	var fileSizeBytes int64
   156  	if fileinfo != nil {
   157  		fileSizeBytes = fileinfo.Size()
   158  	}
   159  	e.Stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   160  		Path:          path,
   161  		Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   162  		FileSizeBytes: fileSizeBytes,
   163  	})
   164  }
   165  
   166  // MaxFileSizeBytes returns the maximum size of a file that can be extracted.
   167  func (e *Extractor) MaxFileSizeBytes() int64 {
   168  	return e.maxFileSizeBytes
   169  }
   170  
   171  // VersionFromContent returns whether module version extraction
   172  // from binary content is enabled.
   173  func (e *Extractor) VersionFromContent() bool {
   174  	return e.versionFromContent
   175  }
   176  
   177  func (e *Extractor) extractPackagesFromBuildInfo(binfo *buildinfo.BuildInfo, filename string) []*extractor.Package {
   178  	res := []*extractor.Package{}
   179  
   180  	validatedGoVers, err := validateGoVersion(binfo.GoVersion)
   181  	if err != nil {
   182  		log.Warnf("failed to validate the Go version from buildinfo (%v): %v", binfo, err)
   183  	}
   184  	if validatedGoVers != "" {
   185  		res = append(res, &extractor.Package{
   186  			Name:      "go",
   187  			Version:   validatedGoVers,
   188  			PURLType:  purl.TypeGolang,
   189  			Locations: []string{filename},
   190  		})
   191  	}
   192  
   193  	for _, dep := range binfo.Deps {
   194  		pkgName, pkgVers := parseDependency(dep)
   195  		if pkgName == "" {
   196  			continue
   197  		}
   198  
   199  		pkgVers = strings.TrimPrefix(pkgVers, "v")
   200  
   201  		pkg := &extractor.Package{
   202  			Name:      pkgName,
   203  			Version:   pkgVers,
   204  			PURLType:  purl.TypeGolang,
   205  			Locations: []string{filename},
   206  		}
   207  		res = append(res, pkg)
   208  	}
   209  
   210  	return res
   211  }
   212  
   213  func validateGoVersion(vers string) (string, error) {
   214  	if vers == "" {
   215  		return "", errors.New("can't validate empty Go version")
   216  	}
   217  
   218  	// The Go version can have multiple parts, in particular for development
   219  	// versions of Go. The actual Go version should be the first part (e.g.
   220  	// 'go1.20-pre3 +a813be86df' -> 'go1.20-pre3')
   221  	goVersion := strings.Split(vers, " ")[0]
   222  
   223  	// Strip the "go" prefix from the Go version. (e.g. go1.16.3 => 1.16.3)
   224  	res := strings.TrimPrefix(goVersion, "go")
   225  	return res, nil
   226  }
   227  
   228  func parseDependency(d *debug.Module) (string, string) {
   229  	dep := d
   230  	// Handle module replacement, but don't replace module if the replacement
   231  	// doesn't have a package name.
   232  	if dep.Replace != nil && dep.Replace.Path != "" {
   233  		dep = dep.Replace
   234  	}
   235  
   236  	return dep.Path, dep.Version
   237  }
   238  
   239  func mainModule(binfo *buildinfo.BuildInfo, filename string) *extractor.Package {
   240  	if binfo.Main.Path == "" {
   241  		return nil
   242  	}
   243  	version := strings.TrimPrefix(binfo.Main.Version, "v")
   244  	return &extractor.Package{
   245  		Name:      binfo.Main.Path,
   246  		Version:   version,
   247  		PURLType:  purl.TypeGolang,
   248  		Locations: []string{filename},
   249  	}
   250  }
   251  
   252  func extractVersionFromConent(reader io.Reader) string {
   253  	buf := bytes.NewBuffer([]byte{})
   254  	if _, err := io.Copy(buf, reader); err != nil {
   255  		return ""
   256  	}
   257  	matches := reVersion.FindSubmatch(buf.Bytes())
   258  	if len(matches) == 0 {
   259  		return ""
   260  	}
   261  
   262  	var version string
   263  	for i, name := range reVersion.SubexpNames() {
   264  		if name == "version" {
   265  			version = string(matches[i])
   266  			log.Infof("name: %q, matches[i]: %q", name, matches[i])
   267  		}
   268  	}
   269  
   270  	version = strings.TrimPrefix(version, "v")
   271  	return version
   272  }