github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "debug/elf" 7 "debug/macho" 8 "debug/pe" 9 "encoding/binary" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "unsafe" 15 16 "github.com/anchore/syft/internal" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/syft/artifact" 19 "github.com/anchore/syft/syft/cpe" 20 "github.com/anchore/syft/syft/file" 21 "github.com/anchore/syft/syft/pkg" 22 "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" 23 ) 24 25 type nativeImageCycloneDX struct { 26 BomFormat string `json:"bomFormat"` 27 SpecVersion string `json:"specVersion"` 28 Version int `json:"version"` 29 Components []nativeImageComponent `json:"components"` 30 } 31 32 type nativeImageComponent struct { 33 Type string `json:"type"` 34 Group string `json:"group"` 35 Name string `json:"name"` 36 Version string `json:"version"` 37 Properties []nativeImageCPE `json:"properties"` 38 } 39 40 type nativeImageCPE struct { 41 Name string `json:"name"` 42 Value string `json:"value"` 43 } 44 45 type nativeImage interface { 46 fetchPkgs() ([]pkg.Package, error) 47 } 48 49 type nativeImageElf struct { 50 file *elf.File 51 } 52 53 type nativeImageMachO struct { 54 file *macho.File 55 } 56 57 type exportTypesPE struct { 58 functionPointer uint32 59 namePointer uint32 60 headerAttribute uint32 61 } 62 63 type exportPrefixPE struct { 64 characteristics uint32 65 timeDateStamp uint32 66 majorVersion uint16 67 minorVersion uint16 68 name uint32 69 base uint32 70 } 71 72 type exportContentPE struct { 73 // Directory Entry Contents for finding SBOM symbols 74 numberOfFunctions uint32 75 numberOfNames uint32 76 addressOfFunctions uint32 77 addressOfNames uint32 78 // Locations of SBOM symbols in the .data section 79 addressOfSbom uint32 80 addressOfSbomLength uint32 81 addressOfSvmVersion uint32 82 } 83 84 // A nativeImagePE must maintain the underlying reader to fetch information unavailable in the Golang API. 85 type nativeImagePE struct { 86 file *pe.File 87 reader io.ReaderAt 88 exportSymbols pe.DataDirectory 89 exports []byte 90 t exportTypesPE 91 header exportPrefixPE 92 } 93 94 type NativeImageCataloger struct{} 95 96 const nativeImageCatalogerName = "graalvm-native-image-cataloger" 97 const nativeImageSbomSymbol = "sbom" 98 const nativeImageSbomLengthSymbol = "sbom_length" 99 const nativeImageSbomVersionSymbol = "__svm_version_info" 100 const nativeImageMissingSymbolsError = "one or more symbols are missing from the native image executable" 101 const nativeImageInvalidIndexError = "parsing the executable file generated an invalid index" 102 const nativeImageMissingExportedDataDirectoryError = "exported data directory is missing" 103 104 // newNativeImageCataloger returns a new Native Image cataloger object. 105 func NewNativeImageCataloger() *NativeImageCataloger { 106 return &NativeImageCataloger{} 107 } 108 109 // Name returns a string that uniquely describes a native image cataloger 110 func (c *NativeImageCataloger) Name() string { 111 return nativeImageCatalogerName 112 } 113 114 // getPackage returns the package given within a NativeImageComponent. 115 func getPackage(component nativeImageComponent) pkg.Package { 116 var cpes []cpe.CPE 117 for _, property := range component.Properties { 118 c, err := cpe.New(property.Value) 119 if err != nil { 120 log.Debugf("unable to parse CPE: %v", err) 121 continue 122 } 123 cpes = append(cpes, c) 124 } 125 return pkg.Package{ 126 Name: component.Name, 127 Version: component.Version, 128 Language: pkg.Java, 129 Type: pkg.GraalVMNativeImagePkg, 130 MetadataType: pkg.JavaMetadataType, 131 FoundBy: nativeImageCatalogerName, 132 Metadata: pkg.JavaMetadata{ 133 PomProperties: &pkg.PomProperties{ 134 GroupID: component.Group, 135 }, 136 }, 137 CPEs: cpes, 138 } 139 } 140 141 // decompressSbom returns the packages given within a native image executable's SBOM. 142 func decompressSbom(dataBuf []byte, sbomStart uint64, lengthStart uint64) ([]pkg.Package, error) { 143 var pkgs []pkg.Package 144 145 lengthEnd := lengthStart + 8 146 bufLen := len(dataBuf) 147 if lengthEnd > uint64(bufLen) { 148 return nil, errors.New("the 'sbom_length' symbol overflows the binary") 149 } 150 151 length := dataBuf[lengthStart:lengthEnd] 152 p := bytes.NewBuffer(length) 153 var storedLength uint64 154 err := binary.Read(p, binary.LittleEndian, &storedLength) 155 if err != nil { 156 return nil, fmt.Errorf("could not read from binary file: %w", err) 157 } 158 159 log.WithFields("len", storedLength).Trace("found java native-image SBOM") 160 sbomEnd := sbomStart + storedLength 161 if sbomEnd > uint64(bufLen) { 162 return nil, errors.New("the sbom symbol overflows the binary") 163 } 164 165 sbomCompressed := dataBuf[sbomStart:sbomEnd] 166 p = bytes.NewBuffer(sbomCompressed) 167 gzreader, err := gzip.NewReader(p) 168 if err != nil { 169 return nil, fmt.Errorf("could not decompress the java native-image SBOM: %w", err) 170 } 171 172 output, err := io.ReadAll(gzreader) 173 if err != nil { 174 return nil, fmt.Errorf("could not read the java native-image SBOM: %w", err) 175 } 176 177 var sbomContent nativeImageCycloneDX 178 err = json.Unmarshal(output, &sbomContent) 179 if err != nil { 180 return nil, fmt.Errorf("could not unmarshal the java native-image SBOM: %w", err) 181 } 182 183 for _, component := range sbomContent.Components { 184 p := getPackage(component) 185 pkgs = append(pkgs, p) 186 } 187 188 return pkgs, nil 189 } 190 191 // fileError logs an error message when an executable cannot be read. 192 func fileError(filename string, err error) (nativeImage, error) { 193 // We could not read the file as a binary for the desired platform, but it may still be a native-image executable. 194 return nil, fmt.Errorf("unable to read executable (file=%q): %w", filename, err) 195 } 196 197 // newElf reads a Native Image from an ELF executable. 198 func newElf(filename string, r io.ReaderAt) (nativeImage, error) { 199 // First attempt to read an ELF file. 200 bi, err := elf.NewFile(r) 201 202 if err != nil { 203 var fmtErr *elf.FormatError 204 if errors.As(err, &fmtErr) { 205 // this is not an elf file 206 log.WithFields("filename", filename, "error", err).Trace("not an ELF binary") 207 return nil, nil 208 } 209 return fileError(filename, err) 210 } 211 if bi == nil { 212 return nil, nil 213 } 214 return nativeImageElf{ 215 file: bi, 216 }, nil 217 } 218 219 // newMachO reads a Native Image from a Mach O executable. 220 func newMachO(filename string, r io.ReaderAt) (nativeImage, error) { 221 // First attempt to read an ELF file. 222 bi, err := macho.NewFile(r) 223 224 if err != nil { 225 var fmtErr *macho.FormatError 226 if errors.As(err, &fmtErr) { 227 // this is not a MachO file 228 log.WithFields("filename", filename, "error", err).Trace("not a MachO binary") 229 return nil, nil 230 } 231 } 232 if bi == nil { 233 return nil, nil 234 } 235 return nativeImageMachO{ 236 file: bi, 237 }, nil 238 } 239 240 // newPE reads a Native Image from a Portable Executable file. 241 func newPE(filename string, r io.ReaderAt) (nativeImage, error) { 242 // First attempt to read an PE file. 243 bi, err := pe.NewFile(r) 244 245 // The reader does not refer to a PE file. 246 if err != nil { 247 // note: there isn't a good way to distinguish between a format error and other kinds of errors 248 log.WithFields("filename", filename, "error", err).Trace("not a PE binary") 249 return nil, nil 250 } 251 if bi == nil { 252 return nil, nil 253 } 254 255 var exportSymbolsDataDirectory pe.DataDirectory 256 switch h := bi.OptionalHeader.(type) { 257 case *pe.OptionalHeader32: 258 exportSymbolsDataDirectory = h.DataDirectory[0] 259 case *pe.OptionalHeader64: 260 exportSymbolsDataDirectory = h.DataDirectory[0] 261 default: 262 return nil, fmt.Errorf("unable to get 'exportSymbolsDataDirectory' from binary: %s", filename) 263 } 264 // If we have no exported symbols it is not a Native Image 265 if exportSymbolsDataDirectory.Size == 0 { 266 return fileError(filename, errors.New(nativeImageMissingExportedDataDirectoryError)) 267 } 268 exportSymbolsOffset := uint64(exportSymbolsDataDirectory.VirtualAddress) 269 exports := make([]byte, exportSymbolsDataDirectory.Size) 270 _, err = r.ReadAt(exports, int64(exportSymbolsOffset)) 271 if err != nil { 272 return fileError(filename, fmt.Errorf("could not read the exported symbols data directory: %w", err)) 273 } 274 return nativeImagePE{ 275 file: bi, 276 reader: r, 277 exportSymbols: exportSymbolsDataDirectory, 278 exports: exports, 279 t: exportTypesPE{ 280 functionPointer: 0, 281 namePointer: 0, 282 headerAttribute: 0, 283 }, 284 header: exportPrefixPE{ 285 characteristics: 0, 286 timeDateStamp: 0, 287 majorVersion: 0, 288 minorVersion: 0, 289 name: 0, 290 base: 0, 291 }, 292 }, nil 293 } 294 295 // fetchPkgs obtains the packages given in the binary. 296 func (ni nativeImageElf) fetchPkgs() (pkgs []pkg.Package, retErr error) { 297 defer func() { 298 if r := recover(); r != nil { 299 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 300 // used without error later down the line. 301 retErr = fmt.Errorf("recovered from panic: %v", r) 302 } 303 }() 304 305 bi := ni.file 306 var sbom elf.Symbol 307 var sbomLength elf.Symbol 308 var svmVersion elf.Symbol 309 310 si, err := bi.Symbols() 311 if err != nil { 312 return nil, fmt.Errorf("no symbols found in binary: %w", err) 313 } 314 if si == nil { 315 return nil, errors.New(nativeImageMissingSymbolsError) 316 } 317 for _, s := range si { 318 switch s.Name { 319 case nativeImageSbomSymbol: 320 sbom = s 321 case nativeImageSbomLengthSymbol: 322 sbomLength = s 323 case nativeImageSbomVersionSymbol: 324 svmVersion = s 325 } 326 } 327 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 328 return nil, errors.New(nativeImageMissingSymbolsError) 329 } 330 dataSection := bi.Section(".data") 331 if dataSection == nil { 332 return nil, fmt.Errorf("no .data section found in binary: %w", err) 333 } 334 dataSectionBase := dataSection.SectionHeader.Addr 335 data, err := dataSection.Data() 336 if err != nil { 337 return nil, fmt.Errorf("cannot read the .data section: %w", err) 338 } 339 sbomLocation := sbom.Value - dataSectionBase 340 lengthLocation := sbomLength.Value - dataSectionBase 341 342 return decompressSbom(data, sbomLocation, lengthLocation) 343 } 344 345 // fetchPkgs obtains the packages from a Native Image given as a Mach O file. 346 func (ni nativeImageMachO) fetchPkgs() (pkgs []pkg.Package, retErr error) { 347 defer func() { 348 if r := recover(); r != nil { 349 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 350 // used without error later down the line. 351 retErr = fmt.Errorf("recovered from panic: %v", r) 352 } 353 }() 354 355 var sbom macho.Symbol 356 var sbomLength macho.Symbol 357 var svmVersion macho.Symbol 358 359 bi := ni.file 360 if bi.Symtab == nil { 361 return nil, errors.New(nativeImageMissingSymbolsError) 362 } 363 for _, s := range bi.Symtab.Syms { 364 switch s.Name { 365 case "_" + nativeImageSbomSymbol: 366 sbom = s 367 case "_" + nativeImageSbomLengthSymbol: 368 sbomLength = s 369 case "_" + nativeImageSbomVersionSymbol: 370 svmVersion = s 371 } 372 } 373 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 374 return nil, errors.New(nativeImageMissingSymbolsError) 375 } 376 377 dataSegment := bi.Segment("__DATA") 378 if dataSegment == nil { 379 return nil, nil 380 } 381 dataBuf, err := dataSegment.Data() 382 if err != nil { 383 log.Tracef("cannot obtain buffer from data segment") 384 return nil, nil 385 } 386 sbomLocation := sbom.Value - dataSegment.Addr 387 lengthLocation := sbomLength.Value - dataSegment.Addr 388 389 return decompressSbom(dataBuf, sbomLocation, lengthLocation) 390 } 391 392 // fetchExportAttribute obtains an attribute from the exported symbols directory entry. 393 func (ni nativeImagePE) fetchExportAttribute(i int) (uint32, error) { 394 var attribute uint32 395 n := len(ni.exports) 396 j := int(unsafe.Sizeof(ni.header)) + i*int(unsafe.Sizeof(ni.t.headerAttribute)) 397 if j+4 >= n { 398 log.Tracef("invalid index to export directory entry attribute: %v", j) 399 return uint32(0), errors.New(nativeImageInvalidIndexError) 400 } 401 p := bytes.NewBuffer(ni.exports[j : j+4]) 402 err := binary.Read(p, binary.LittleEndian, &attribute) 403 if err != nil { 404 log.Tracef("error fetching export directory entry attribute: %v", err) 405 return uint32(0), err 406 } 407 return attribute, nil 408 } 409 410 // fetchExportFunctionPointer obtains a function pointer from the exported symbols directory entry. 411 func (ni nativeImagePE) fetchExportFunctionPointer(functionsBase uint32, i uint32) (uint32, error) { 412 var pointer uint32 413 414 n := uint32(len(ni.exports)) 415 sz := uint32(unsafe.Sizeof(ni.t.functionPointer)) 416 j := functionsBase + i*sz 417 if j+sz >= n { 418 log.Tracef("invalid index to exported function: %v", j) 419 return uint32(0), errors.New(nativeImageInvalidIndexError) 420 } 421 p := bytes.NewBuffer(ni.exports[j : j+sz]) 422 err := binary.Read(p, binary.LittleEndian, &pointer) 423 if err != nil { 424 log.Tracef("error fetching exported function: %v", err) 425 return uint32(0), err 426 } 427 return pointer, nil 428 } 429 430 // fetchExportContent obtains the content of the export directory entry relevant to the SBOM. 431 func (ni nativeImagePE) fetchExportContent() (*exportContentPE, error) { 432 content := new(exportContentPE) 433 var err error 434 content.numberOfFunctions, err = ni.fetchExportAttribute(0) 435 if err != nil { 436 return nil, fmt.Errorf("could not find the number of exported 'number of functions' attribute: %w", err) 437 } 438 content.numberOfNames, err = ni.fetchExportAttribute(1) 439 if err != nil { 440 return nil, fmt.Errorf("could not find the number of exported 'number of names' attribute: %w", err) 441 } 442 content.addressOfFunctions, err = ni.fetchExportAttribute(2) 443 if err != nil { 444 return nil, fmt.Errorf("could not find the exported 'address of functions' attribute: %w", err) 445 } 446 content.addressOfNames, err = ni.fetchExportAttribute(3) 447 if err != nil { 448 return nil, fmt.Errorf("could not find the exported 'address of names' attribute: %w", err) 449 } 450 return content, nil 451 } 452 453 // fetchSbomSymbols enumerates the symbols exported by a binary to detect Native Image's SBOM symbols. 454 func (ni nativeImagePE) fetchSbomSymbols(content *exportContentPE) { 455 // Appending NULL bytes to symbol names simplifies finding them in the export data directory 456 sbomBytes := []byte(nativeImageSbomSymbol + "\x00") 457 sbomLengthBytes := []byte(nativeImageSbomLengthSymbol + "\x00") 458 svmVersionInfoBytes := []byte(nativeImageSbomVersionSymbol + "\x00") 459 n := uint32(len(ni.exports)) 460 461 // Find SBOM, SBOM Length, and SVM Version Symbol 462 for i := uint32(0); i < content.numberOfNames; i++ { 463 j := i * uint32(unsafe.Sizeof(ni.t.namePointer)) 464 addressBase := content.addressOfNames - ni.exportSymbols.VirtualAddress 465 k := addressBase + j 466 sz := uint32(unsafe.Sizeof(ni.t.namePointer)) 467 if k+sz >= n { 468 log.Tracef("invalid index to exported function: %v", k) 469 // If we are at the end of exports, stop looking 470 return 471 } 472 var symbolAddress uint32 473 p := bytes.NewBuffer(ni.exports[k : k+sz]) 474 err := binary.Read(p, binary.LittleEndian, &symbolAddress) 475 if err != nil { 476 log.Tracef("error fetching address of symbol %v", err) 477 return 478 } 479 symbolBase := symbolAddress - ni.exportSymbols.VirtualAddress 480 if symbolBase >= n { 481 log.Tracef("invalid index to exported symbol: %v", symbolBase) 482 return 483 } 484 switch { 485 case bytes.HasPrefix(ni.exports[symbolBase:], sbomBytes): 486 content.addressOfSbom = i 487 case bytes.HasPrefix(ni.exports[symbolBase:], sbomLengthBytes): 488 content.addressOfSbomLength = i 489 case bytes.HasPrefix(ni.exports[symbolBase:], svmVersionInfoBytes): 490 content.addressOfSvmVersion = i 491 } 492 } 493 } 494 495 // fetchPkgs obtains the packages from a Native Image given as a PE file. 496 func (ni nativeImagePE) fetchPkgs() (pkgs []pkg.Package, retErr error) { 497 defer func() { 498 if r := recover(); r != nil { 499 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 500 // used without error later down the line. 501 retErr = fmt.Errorf("recovered from panic: %v", r) 502 } 503 }() 504 505 content, err := ni.fetchExportContent() 506 if err != nil { 507 log.Debugf("could not fetch the content of the export directory entry: %v", err) 508 return nil, err 509 } 510 ni.fetchSbomSymbols(content) 511 if content.addressOfSbom == uint32(0) || content.addressOfSbomLength == uint32(0) || content.addressOfSvmVersion == uint32(0) { 512 return nil, errors.New(nativeImageMissingSymbolsError) 513 } 514 functionsBase := content.addressOfFunctions - ni.exportSymbols.VirtualAddress 515 sbomOffset := content.addressOfSbom 516 sbomAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomOffset) 517 if err != nil { 518 return nil, fmt.Errorf("could not fetch SBOM pointer from exported functions: %w", err) 519 } 520 sbomLengthOffset := content.addressOfSbomLength 521 sbomLengthAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomLengthOffset) 522 if err != nil { 523 return nil, fmt.Errorf("could not fetch SBOM length pointer from exported functions: %w", err) 524 } 525 bi := ni.file 526 dataSection := bi.Section(".data") 527 if dataSection == nil { 528 return nil, nil 529 } 530 dataBuf, err := dataSection.Data() 531 if err != nil { 532 log.Tracef("cannot obtain buffer from the java native-image .data section") 533 return nil, nil 534 } 535 sbomLocation := sbomAddress - dataSection.VirtualAddress 536 lengthLocation := sbomLengthAddress - dataSection.VirtualAddress 537 538 return decompressSbom(dataBuf, uint64(sbomLocation), uint64(lengthLocation)) 539 } 540 541 // fetchPkgs provides the packages available in a UnionReader. 542 func fetchPkgs(reader unionreader.UnionReader, filename string) []pkg.Package { 543 var pkgs []pkg.Package 544 imageFormats := []func(string, io.ReaderAt) (nativeImage, error){newElf, newMachO, newPE} 545 546 // NOTE: multiple readers are returned to cover universal binaries, which are files 547 // with more than one binary 548 readers, err := unionreader.GetReaders(reader) 549 if err != nil { 550 log.Debugf("failed to open the java native-image binary: %v", err) 551 return nil 552 } 553 for _, r := range readers { 554 for _, makeNativeImage := range imageFormats { 555 ni, err := makeNativeImage(filename, r) 556 if err != nil { 557 continue 558 } 559 if ni == nil { 560 continue 561 } 562 newPkgs, err := ni.fetchPkgs() 563 if err != nil { 564 log.Tracef("unable to extract SBOM from possible java native-image %s: %v", filename, err) 565 continue 566 } 567 pkgs = append(pkgs, newPkgs...) 568 } 569 } 570 return pkgs 571 } 572 573 // Catalog attempts to find any native image executables reachable from a resolver. 574 func (c *NativeImageCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 575 var pkgs []pkg.Package 576 fileMatches, err := resolver.FilesByMIMEType(internal.ExecutableMIMETypeSet.List()...) 577 if err != nil { 578 return pkgs, nil, fmt.Errorf("failed to find binaries by mime types: %w", err) 579 } 580 581 for _, location := range fileMatches { 582 readerCloser, err := resolver.FileContentsByLocation(location) 583 if err != nil { 584 log.Debugf("error opening file: %v", err) 585 continue 586 } 587 588 reader, err := unionreader.GetUnionReader(readerCloser) 589 if err != nil { 590 return nil, nil, err 591 } 592 newPkgs := fetchPkgs(reader, location.RealPath) 593 pkgs = append(pkgs, newPkgs...) 594 internal.CloseAndLogError(readerCloser, location.RealPath) 595 } 596 597 return pkgs, nil, nil 598 }