github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/parse_go_binary.go (about) 1 package golang 2 3 import ( 4 "bytes" 5 "context" 6 "debug/elf" 7 "debug/macho" 8 "debug/pe" 9 "errors" 10 "fmt" 11 "io" 12 "regexp" 13 "runtime/debug" 14 "slices" 15 "strings" 16 "time" 17 18 "golang.org/x/mod/module" 19 20 "github.com/anchore/syft/internal" 21 "github.com/anchore/syft/internal/log" 22 "github.com/anchore/syft/syft/artifact" 23 "github.com/anchore/syft/syft/file" 24 "github.com/anchore/syft/syft/internal/unionreader" 25 "github.com/anchore/syft/syft/pkg" 26 "github.com/anchore/syft/syft/pkg/cataloger/generic" 27 "github.com/anchore/syft/syft/pkg/cataloger/golang/internal/xcoff" 28 ) 29 30 const goArch = "GOARCH" 31 32 var ( 33 // errUnrecognizedFormat is returned when a given executable file doesn't 34 // appear to be in a known format, or it breaks the rules of that format, 35 // or when there are I/O errors reading the file. 36 errUnrecognizedFormat = errors.New("unrecognized file format") 37 // devel is used to recognize the current default version when a golang main distribution is built 38 // https://github.com/golang/go/issues/29228 this issue has more details on the progress of being able to 39 // inject the correct version into the main module of the build process 40 41 knownBuildFlagPatterns = []*regexp.Regexp{ 42 regexp.MustCompile(`(?m)\.[a-zA-Z0-9]*([rR]elease)?([gG]it)?([bB]uild)?[vV]er(sion)?=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`), 43 regexp.MustCompile(`(?m)\.[a-zA-Z0-9]*([tT]ag)=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`), 44 } 45 ) 46 47 const devel = "(devel)" 48 49 type goBinaryCataloger struct { 50 licenseResolver goLicenseResolver 51 mainModuleVersion MainModuleVersionConfig 52 } 53 54 func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { 55 return &goBinaryCataloger{ 56 licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts), 57 mainModuleVersion: opts.MainModuleVersion, 58 } 59 } 60 61 // parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler. 62 func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 63 var pkgs []pkg.Package 64 65 unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) 66 if err != nil { 67 return nil, nil, err 68 } 69 defer internal.CloseAndLogError(reader.ReadCloser, reader.RealPath) 70 71 mods, errs := scanFile(reader.Location, unionReader) 72 73 var rels []artifact.Relationship 74 for _, mod := range mods { 75 var depPkgs []pkg.Package 76 mainPkg, depPkgs := c.buildGoPkgInfo(ctx, resolver, reader.Location, mod, mod.arch, unionReader) 77 if mainPkg != nil { 78 rels = createModuleRelationships(*mainPkg, depPkgs) 79 pkgs = append(pkgs, *mainPkg) 80 } 81 pkgs = append(pkgs, depPkgs...) 82 } 83 84 return pkgs, rels, errs 85 } 86 87 func createModuleRelationships(main pkg.Package, deps []pkg.Package) []artifact.Relationship { 88 var relationships []artifact.Relationship 89 90 for _, dep := range deps { 91 relationships = append(relationships, artifact.Relationship{ 92 From: dep, 93 To: main, 94 Type: artifact.DependencyOfRelationship, 95 }) 96 } 97 98 return relationships 99 } 100 101 // moduleEqual is used to deduplicate go modules especially the sub module may be identical to the main one 102 func moduleEqual(lhs, rhs *debug.Module) bool { 103 if lhs == rhs { 104 return true 105 } 106 if lhs == nil || rhs == nil { 107 return false 108 } 109 110 if lhs.Path != rhs.Path || 111 lhs.Version != rhs.Version || 112 lhs.Sum != rhs.Sum { 113 return false 114 } 115 116 return moduleEqual(lhs.Replace, rhs.Replace) 117 } 118 119 var emptyModule debug.Module 120 var moduleFromPartialPackageBuild = debug.Module{Path: "command-line-arguments"} 121 122 func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) (*pkg.Package, []pkg.Package) { 123 if mod == nil { 124 return nil, nil 125 } 126 127 if missingMainModule(mod) { 128 mod.Main = createMainModuleFromPath(mod) 129 } 130 131 var pkgs []pkg.Package 132 for _, dep := range mod.Deps { 133 if dep == nil { 134 continue 135 } 136 if moduleEqual(dep, &mod.Main) { 137 continue 138 } 139 lics := c.licenseResolver.getLicenses(ctx, resolver, dep.Path, dep.Version) 140 gover, experiments := getExperimentsFromVersion(mod.GoVersion) 141 142 m := newBinaryMetadata( 143 dep, 144 mod.Main.Path, 145 gover, 146 arch, 147 nil, 148 mod.cryptoSettings, 149 experiments, 150 ) 151 152 p := c.newGoBinaryPackage( 153 dep, 154 m, 155 lics, 156 location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 157 ) 158 if pkg.IsValid(&p) { 159 pkgs = append(pkgs, p) 160 } 161 } 162 163 if mod.Main == emptyModule { 164 return nil, pkgs 165 } 166 167 main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader) 168 169 return &main, pkgs 170 } 171 172 func missingMainModule(mod *extendedBuildInfo) bool { 173 if mod.Main == emptyModule && mod.Path != "" { 174 return true 175 } 176 // special case: when invoking go build with a source file and not a package (directory) then you will 177 // see "command-line-arguments" as the main module path... even though that's not the main module. In this 178 // circumstance, we should treat the main module as missing and search for it within the dependencies. 179 return mod.Main == moduleFromPartialPackageBuild 180 } 181 182 func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package { 183 gbs := getBuildSettings(mod.Settings) 184 lics := c.licenseResolver.getLicenses(ctx, resolver, mod.Main.Path, mod.Main.Version) 185 gover, experiments := getExperimentsFromVersion(mod.GoVersion) 186 187 m := newBinaryMetadata( 188 &mod.Main, 189 mod.Main.Path, 190 gover, 191 arch, 192 gbs, 193 mod.cryptoSettings, 194 experiments, 195 ) 196 197 if mod.Main.Version == devel { 198 version := c.findMainModuleVersion(&m, gbs, reader) 199 200 if version != "" { 201 // make sure version is prefixed with v as some build systems parsed 202 // during `findMainModuleVersion` can include incomplete semver 203 // vx.x.x is correct 204 version = ensurePrefix(version, "v") 205 } 206 mod.Main.Version = version 207 } 208 209 main := c.newGoBinaryPackage( 210 &mod.Main, 211 m, 212 lics, 213 location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 214 ) 215 216 return main 217 } 218 219 // this is checking for (.L)? because at least one binary seems to have \xA0L preceding the version string, but for some reason 220 // this is unable to be matched by the regex here as \x00\xA0L; 221 // the only thing that seems to work is to just look for version strings following both \x00 and \x00.L for now 222 var semverPattern = regexp.MustCompile(`(\x00|\x{FFFD})(.L)?(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`) 223 224 func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuildinfoEntry, gbs pkg.KeyValues, reader io.ReadSeekCloser) string { 225 vcsVersion, hasVersion := gbs.Get("vcs.revision") 226 timestamp, hasTimestamp := gbs.Get("vcs.time") 227 228 var ldflags, majorVersion, fullVersion string 229 if c.mainModuleVersion.FromLDFlags && metadata != nil { 230 // we've found a specific version from the ldflags! use it as the version. 231 // why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)? 232 // short answer: we're assuming that if a specific semver was provided in the ldflags that 233 // there is a matching vcs tag to match that could be referenced. This assumption could 234 // be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical 235 // version of the package. 236 ldflags, _ = metadata.BuildSettings.Get("-ldflags") 237 238 majorVersion, fullVersion = extractVersionFromLDFlags(ldflags, metadata.MainModule) 239 if fullVersion != "" { 240 return fullVersion 241 } 242 } 243 244 // guess the version from pattern matching in the binary (can result in false positives) 245 if c.mainModuleVersion.FromContents { 246 _, err := reader.Seek(0, io.SeekStart) 247 if err != nil { 248 log.WithFields("error", err).Trace("unable to seek to start of go binary reader") 249 } else { 250 if v := extractVersionFromContents(reader); v != "" { 251 return v 252 } 253 } 254 } 255 256 // fallback to using the go standard pseudo v0.0.0 version 257 if c.mainModuleVersion.FromBuildSettings && hasVersion && hasTimestamp { 258 version := vcsVersion 259 //NOTE: err is ignored, because if parsing fails 260 // we still use the empty Time{} struct to generate an empty date, like 00010101000000 261 // for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions 262 ts, _ := time.Parse(time.RFC3339, timestamp) 263 if len(vcsVersion) >= 12 { 264 version = vcsVersion[:12] 265 } 266 267 return module.PseudoVersion(majorVersion, fullVersion, ts, version) 268 } 269 270 return "" 271 } 272 273 func extractVersionFromContents(reader io.Reader) string { 274 matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(semverPattern, reader) 275 if err != nil { 276 log.WithFields("error", err).Trace("unable to extract version from go binary reader") 277 return "" 278 } 279 280 version, ok := matchMetadata["version"] 281 if ok { 282 return version 283 } 284 return "" 285 } 286 287 func extractVersionFromLDFlags(ldflags string, maimModule string) (majorVersion string, fullVersion string) { 288 if ldflags == "" { 289 return "", "" 290 } 291 292 for _, pattern := range knownBuildFlagPatterns { 293 newPattern := regexp.MustCompile(fmt.Sprintf(`(main|%s\/[^\s]*)%s`, strings.ReplaceAll(maimModule, "/", "\\/"), pattern.String())) 294 groups := internal.MatchNamedCaptureGroups(newPattern, ldflags) 295 v, ok := groups["version"] 296 297 if !ok { 298 continue 299 } 300 301 fullVersion = v 302 if !strings.HasPrefix(v, "v") { 303 fullVersion = fmt.Sprintf("v%s", v) 304 } 305 components := strings.Split(v, ".") 306 307 if len(components) == 0 { 308 continue 309 } 310 311 majorVersion = strings.TrimPrefix(components[0], "v") 312 return majorVersion, fullVersion 313 } 314 315 return "", "" 316 } 317 318 func getGOARCH(settings []debug.BuildSetting) string { 319 for _, s := range settings { 320 if s.Key == goArch { 321 return s.Value 322 } 323 } 324 325 return "" 326 } 327 328 func getGOARCHFromBin(r io.ReaderAt) (string, error) { 329 // Read the first bytes of the file to identify the format, then delegate to 330 // a format-specific function to load segment and section headers. 331 ident := make([]byte, 16) 332 if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil { 333 return "", fmt.Errorf("unrecognized file format: %w", err) 334 } 335 336 var arch string 337 switch { 338 case bytes.HasPrefix(ident, []byte("\x7FELF")): 339 f, err := elf.NewFile(r) 340 if err != nil { 341 return "", fmt.Errorf("unrecognized file format: %w", err) 342 } 343 arch = f.Machine.String() 344 case bytes.HasPrefix(ident, []byte("MZ")): 345 f, err := pe.NewFile(r) 346 if err != nil { 347 return "", fmt.Errorf("unrecognized file format: %w", err) 348 } 349 arch = fmt.Sprintf("%d", f.Machine) 350 case bytes.HasPrefix(ident, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(ident[1:], []byte("\xFA\xED\xFE")): 351 f, err := macho.NewFile(r) 352 if err != nil { 353 return "", fmt.Errorf("unrecognized file format: %w", err) 354 } 355 arch = f.Cpu.String() 356 case bytes.HasPrefix(ident, []byte{0x01, 0xDF}) || bytes.HasPrefix(ident, []byte{0x01, 0xF7}): 357 f, err := xcoff.NewFile(r) 358 if err != nil { 359 return "", fmt.Errorf("unrecognized file format: %w", err) 360 } 361 arch = fmt.Sprintf("%d", f.TargetMachine) 362 default: 363 return "", errUnrecognizedFormat 364 } 365 366 arch = strings.Replace(arch, "EM_", "", 1) 367 arch = strings.Replace(arch, "Cpu", "", 1) 368 arch = strings.ToLower(arch) 369 370 return arch, nil 371 } 372 373 func getBuildSettings(settings []debug.BuildSetting) pkg.KeyValues { 374 m := make(pkg.KeyValues, 0) 375 for _, s := range settings { 376 m = append(m, pkg.KeyValue{ 377 Key: s.Key, 378 Value: s.Value, 379 }) 380 } 381 return m 382 } 383 384 func getExperimentsFromVersion(version string) (string, []string) { 385 // See: https://github.com/anchore/grype/issues/1851 386 var experiments []string 387 version, rest, ok := strings.Cut(version, " ") 388 if ok { 389 // Assume they may add more non-version chunks in the future, so only look for "X:". 390 for _, chunk := range strings.Split(rest, " ") { 391 if strings.HasPrefix(rest, "X:") { 392 csv := strings.TrimPrefix(chunk, "X:") 393 experiments = append(experiments, strings.Split(csv, ",")...) 394 } 395 } 396 } 397 398 return version, experiments 399 } 400 401 func createMainModuleFromPath(existing *extendedBuildInfo) debug.Module { 402 // search for a main module candidate within the dependencies 403 var mainModuleCandidates []debug.Module 404 var usedIndex int 405 for i, dep := range existing.Deps { 406 if dep == nil { 407 continue 408 } 409 410 if dep.Version == devel { 411 usedIndex = i 412 mainModuleCandidates = append(mainModuleCandidates, *dep) 413 } 414 } 415 if len(mainModuleCandidates) == 1 { 416 // we need to prune the dependency from module list 417 existing.Deps = slices.Delete(existing.Deps, usedIndex, usedIndex+1) 418 return mainModuleCandidates[0] 419 } 420 421 // otherwise craft a main module from the path (a bit of a cop out, but allows us to have a main module) 422 return debug.Module{ 423 Path: existing.Path, 424 Version: devel, 425 } 426 } 427 428 func ensurePrefix(s, prefix string) string { 429 if !strings.HasPrefix(s, prefix) { 430 return prefix + s 431 } 432 return s 433 }