github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/golang/gobinary/gobinary.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package gobinary extracts packages from buildinfo inside go binaries files. 16 package gobinary 17 18 import ( 19 "bytes" 20 "context" 21 "debug/buildinfo" 22 "errors" 23 "io" 24 "io/fs" 25 "regexp" 26 "runtime/debug" 27 "strings" 28 29 "github.com/google/osv-scalibr/extractor" 30 "github.com/google/osv-scalibr/extractor/filesystem" 31 "github.com/google/osv-scalibr/inventory" 32 "github.com/google/osv-scalibr/log" 33 "github.com/google/osv-scalibr/plugin" 34 "github.com/google/osv-scalibr/purl" 35 "github.com/google/osv-scalibr/stats" 36 37 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 38 ) 39 40 const ( 41 // Name is the unique name of this extractor. 42 Name = "go/binary" 43 // devel is the version of the development binary. 44 devel = "(devel)" 45 ) 46 47 var ( 48 // reVersion is a regexp to parse the Go version from the binary content. 49 reVersion = regexp.MustCompile(`(\x00|\x{FFFD})(.L)?(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`) 50 ) 51 52 // Extractor extracts packages from buildinfo inside go binaries files. 53 type Extractor struct { 54 55 // Stats is a stats collector for reporting metrics. 56 Stats stats.Collector 57 58 // maxFileSizeBytes is the maximum size of a file that can be extracted. 59 // If this limit is greater than zero and a file is encountered that is larger 60 // than this limit, the file is ignored by returning false for `FileRequired`. 61 maxFileSizeBytes int64 62 63 // versionFromBinary enables extracting the module version from the binary content. 64 // This operation is expensive because it uses a regexp to parse the binary content. 65 versionFromContent bool 66 } 67 68 // New returns a Go binary extractor. 69 func New(cfg *cpb.PluginConfig) filesystem.Extractor { 70 e := &Extractor{maxFileSizeBytes: cfg.MaxFileSizeBytes} 71 specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.GoBinaryConfig { return c.GetGoBinary() }) 72 e.versionFromContent = specific.GetVersionFromContent() 73 return e 74 } 75 76 // Name of the extractor. 77 func (e Extractor) Name() string { return Name } 78 79 // Version of the extractor. 80 func (e Extractor) Version() int { return 0 } 81 82 // Requirements of the extractor. 83 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 84 85 // FileRequired returns true if the specified file is marked executable. 86 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 87 if !filesystem.IsInterestingExecutable(api) { 88 return false 89 } 90 91 fileinfo, err := api.Stat() 92 if err != nil { 93 return false 94 } 95 96 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 97 e.reportFileRequired(api.Path(), fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 98 return false 99 } 100 101 e.reportFileRequired(api.Path(), fileinfo.Size(), stats.FileRequiredResultOK) 102 return true 103 } 104 105 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 106 if e.Stats == nil { 107 return 108 } 109 e.Stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 110 Path: path, 111 Result: result, 112 FileSizeBytes: fileSizeBytes, 113 }) 114 } 115 116 // Extract returns a list of installed third party dependencies in a Go binary. 117 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 118 var readerAt io.ReaderAt 119 if fileWithReaderAt, ok := input.Reader.(io.ReaderAt); ok { 120 readerAt = fileWithReaderAt 121 } else { 122 buf := bytes.NewBuffer([]byte{}) 123 _, err := io.Copy(buf, input.Reader) 124 if err != nil { 125 return inventory.Inventory{}, err 126 } 127 readerAt = bytes.NewReader(buf.Bytes()) 128 } 129 130 binfo, err := buildinfo.Read(readerAt) 131 if err != nil { 132 log.Debugf("error parsing the contents of Go binary (%s) for extraction: %v", input.Path, err) 133 e.reportFileExtracted(input.Path, input.Info, err) 134 return inventory.Inventory{}, nil 135 } 136 137 pkg := e.extractPackagesFromBuildInfo(binfo, input.Path) 138 mainPkg := mainModule(binfo, input.Path) 139 if mainPkg != nil { 140 if mainPkg.Version == devel && e.versionFromContent { 141 if version := extractVersionFromConent(input.Reader); version != "" { 142 mainPkg.Version = version 143 } 144 } 145 pkg = append(pkg, mainPkg) 146 } 147 e.reportFileExtracted(input.Path, input.Info, nil) 148 return inventory.Inventory{Packages: pkg}, nil 149 } 150 151 func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, err error) { 152 if e.Stats == nil { 153 return 154 } 155 var fileSizeBytes int64 156 if fileinfo != nil { 157 fileSizeBytes = fileinfo.Size() 158 } 159 e.Stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 160 Path: path, 161 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 162 FileSizeBytes: fileSizeBytes, 163 }) 164 } 165 166 // MaxFileSizeBytes returns the maximum size of a file that can be extracted. 167 func (e *Extractor) MaxFileSizeBytes() int64 { 168 return e.maxFileSizeBytes 169 } 170 171 // VersionFromContent returns whether module version extraction 172 // from binary content is enabled. 173 func (e *Extractor) VersionFromContent() bool { 174 return e.versionFromContent 175 } 176 177 func (e *Extractor) extractPackagesFromBuildInfo(binfo *buildinfo.BuildInfo, filename string) []*extractor.Package { 178 res := []*extractor.Package{} 179 180 validatedGoVers, err := validateGoVersion(binfo.GoVersion) 181 if err != nil { 182 log.Warnf("failed to validate the Go version from buildinfo (%v): %v", binfo, err) 183 } 184 if validatedGoVers != "" { 185 res = append(res, &extractor.Package{ 186 Name: "go", 187 Version: validatedGoVers, 188 PURLType: purl.TypeGolang, 189 Locations: []string{filename}, 190 }) 191 } 192 193 for _, dep := range binfo.Deps { 194 pkgName, pkgVers := parseDependency(dep) 195 if pkgName == "" { 196 continue 197 } 198 199 pkgVers = strings.TrimPrefix(pkgVers, "v") 200 201 pkg := &extractor.Package{ 202 Name: pkgName, 203 Version: pkgVers, 204 PURLType: purl.TypeGolang, 205 Locations: []string{filename}, 206 } 207 res = append(res, pkg) 208 } 209 210 return res 211 } 212 213 func validateGoVersion(vers string) (string, error) { 214 if vers == "" { 215 return "", errors.New("can't validate empty Go version") 216 } 217 218 // The Go version can have multiple parts, in particular for development 219 // versions of Go. The actual Go version should be the first part (e.g. 220 // 'go1.20-pre3 +a813be86df' -> 'go1.20-pre3') 221 goVersion := strings.Split(vers, " ")[0] 222 223 // Strip the "go" prefix from the Go version. (e.g. go1.16.3 => 1.16.3) 224 res := strings.TrimPrefix(goVersion, "go") 225 return res, nil 226 } 227 228 func parseDependency(d *debug.Module) (string, string) { 229 dep := d 230 // Handle module replacement, but don't replace module if the replacement 231 // doesn't have a package name. 232 if dep.Replace != nil && dep.Replace.Path != "" { 233 dep = dep.Replace 234 } 235 236 return dep.Path, dep.Version 237 } 238 239 func mainModule(binfo *buildinfo.BuildInfo, filename string) *extractor.Package { 240 if binfo.Main.Path == "" { 241 return nil 242 } 243 version := strings.TrimPrefix(binfo.Main.Version, "v") 244 return &extractor.Package{ 245 Name: binfo.Main.Path, 246 Version: version, 247 PURLType: purl.TypeGolang, 248 Locations: []string{filename}, 249 } 250 } 251 252 func extractVersionFromConent(reader io.Reader) string { 253 buf := bytes.NewBuffer([]byte{}) 254 if _, err := io.Copy(buf, reader); err != nil { 255 return "" 256 } 257 matches := reVersion.FindSubmatch(buf.Bytes()) 258 if len(matches) == 0 { 259 return "" 260 } 261 262 var version string 263 for i, name := range reVersion.SubexpNames() { 264 if name == "version" { 265 version = string(matches[i]) 266 log.Infof("name: %q, matches[i]: %q", name, matches[i]) 267 } 268 } 269 270 version = strings.TrimPrefix(version, "v") 271 return version 272 }