github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "context" 7 "debug/elf" 8 "debug/macho" 9 "debug/pe" 10 "encoding/binary" 11 "encoding/json" 12 "errors" 13 "fmt" 14 "io" 15 "unsafe" 16 17 "github.com/anchore/syft/internal" 18 "github.com/anchore/syft/internal/log" 19 "github.com/anchore/syft/internal/mimetype" 20 "github.com/anchore/syft/syft/artifact" 21 "github.com/anchore/syft/syft/cpe" 22 "github.com/anchore/syft/syft/file" 23 "github.com/anchore/syft/syft/internal/unionreader" 24 "github.com/anchore/syft/syft/pkg" 25 ) 26 27 type nativeImageCycloneDX struct { 28 BomFormat string `json:"bomFormat"` 29 SpecVersion string `json:"specVersion"` 30 Version int `json:"version"` 31 Components []nativeImageComponent `json:"components"` 32 } 33 34 type nativeImageComponent struct { 35 Type string `json:"type"` 36 Group string `json:"group"` 37 Name string `json:"name"` 38 Version string `json:"version"` 39 Properties []nativeImageCPE `json:"properties"` 40 } 41 42 type nativeImageCPE struct { 43 Name string `json:"name"` 44 Value string `json:"value"` 45 } 46 47 type nativeImage interface { 48 fetchPkgs() ([]pkg.Package, error) 49 } 50 51 type nativeImageElf struct { 52 file *elf.File 53 } 54 55 type nativeImageMachO struct { 56 file *macho.File 57 } 58 59 type exportTypesPE struct { 60 functionPointer uint32 61 namePointer uint32 62 headerAttribute uint32 63 } 64 65 type exportPrefixPE struct { 66 characteristics uint32 67 timeDateStamp uint32 68 majorVersion uint16 69 minorVersion uint16 70 name uint32 71 base uint32 72 } 73 74 type exportContentPE struct { 75 // Directory Entry Contents for finding SBOM symbols 76 numberOfFunctions uint32 77 numberOfNames uint32 78 addressOfFunctions uint32 79 addressOfNames uint32 80 // Locations of SBOM symbols in the .data section 81 addressOfSbom uint32 82 addressOfSbomLength uint32 83 addressOfSvmVersion uint32 84 } 85 86 // A nativeImagePE must maintain the underlying reader to fetch information unavailable in the Golang API. 87 type nativeImagePE struct { 88 file *pe.File 89 reader io.ReaderAt 90 exportSymbols pe.DataDirectory 91 exports []byte 92 t exportTypesPE 93 header exportPrefixPE 94 } 95 96 type nativeImageCataloger struct{} 97 98 const nativeImageCatalogerName = "graalvm-native-image-cataloger" 99 const nativeImageSbomSymbol = "sbom" 100 const nativeImageSbomLengthSymbol = "sbom_length" 101 const nativeImageSbomVersionSymbol = "__svm_version_info" 102 const nativeImageMissingSymbolsError = "one or more symbols are missing from the native image executable" 103 const nativeImageInvalidIndexError = "parsing the executable file generated an invalid index" 104 const nativeImageMissingExportedDataDirectoryError = "exported data directory is missing" 105 106 // NewNativeImageCataloger returns a new Native Image cataloger object. 107 func NewNativeImageCataloger() pkg.Cataloger { 108 return &nativeImageCataloger{} 109 } 110 111 // Name returns a string that uniquely describes a native image cataloger 112 func (c *nativeImageCataloger) Name() string { 113 return nativeImageCatalogerName 114 } 115 116 // getPackage returns the package given within a NativeImageComponent. 117 func getPackage(component nativeImageComponent) pkg.Package { 118 var cpes []cpe.CPE 119 for _, property := range component.Properties { 120 c, err := cpe.New(property.Value, cpe.DeclaredSource) 121 if err != nil { 122 log.Debugf("unable to parse Attributes: %v", err) 123 continue 124 } 125 cpes = append(cpes, c) 126 } 127 return pkg.Package{ 128 Name: component.Name, 129 Version: component.Version, 130 Language: pkg.Java, 131 Type: pkg.GraalVMNativeImagePkg, 132 FoundBy: nativeImageCatalogerName, 133 Metadata: pkg.JavaArchive{ 134 PomProperties: &pkg.JavaPomProperties{ 135 GroupID: component.Group, 136 }, 137 }, 138 CPEs: cpes, 139 } 140 } 141 142 // decompressSbom returns the packages given within a native image executable's SBOM. 143 func decompressSbom(dataBuf []byte, sbomStart uint64, lengthStart uint64) ([]pkg.Package, error) { 144 var pkgs []pkg.Package 145 146 lengthEnd := lengthStart + 8 147 bufLen := len(dataBuf) 148 if lengthEnd > uint64(bufLen) { 149 return nil, errors.New("the 'sbom_length' symbol overflows the binary") 150 } 151 152 length := dataBuf[lengthStart:lengthEnd] 153 p := bytes.NewBuffer(length) 154 var storedLength uint64 155 err := binary.Read(p, binary.LittleEndian, &storedLength) 156 if err != nil { 157 return nil, fmt.Errorf("could not read from binary file: %w", err) 158 } 159 160 log.WithFields("len", storedLength).Trace("found java native-image SBOM") 161 sbomEnd := sbomStart + storedLength 162 if sbomEnd > uint64(bufLen) { 163 return nil, errors.New("the sbom symbol overflows the binary") 164 } 165 166 sbomCompressed := dataBuf[sbomStart:sbomEnd] 167 p = bytes.NewBuffer(sbomCompressed) 168 gzreader, err := gzip.NewReader(p) 169 if err != nil { 170 return nil, fmt.Errorf("could not decompress the java native-image SBOM: %w", err) 171 } 172 173 output, err := io.ReadAll(gzreader) 174 if err != nil { 175 return nil, fmt.Errorf("could not read the java native-image SBOM: %w", err) 176 } 177 178 var sbomContent nativeImageCycloneDX 179 err = json.Unmarshal(output, &sbomContent) 180 if err != nil { 181 return nil, fmt.Errorf("could not unmarshal the java native-image SBOM: %w", err) 182 } 183 184 for _, component := range sbomContent.Components { 185 p := getPackage(component) 186 pkgs = append(pkgs, p) 187 } 188 189 return pkgs, nil 190 } 191 192 // fileError logs an error message when an executable cannot be read. 193 func fileError(filename string, err error) (nativeImage, error) { 194 // We could not read the file as a binary for the desired platform, but it may still be a native-image executable. 195 return nil, fmt.Errorf("unable to read executable (file=%q): %w", filename, err) 196 } 197 198 // newElf reads a Native Image from an ELF executable. 199 func newElf(filename string, r io.ReaderAt) (nativeImage, error) { 200 // First attempt to read an ELF file. 201 bi, err := elf.NewFile(r) 202 203 if err != nil { 204 var fmtErr *elf.FormatError 205 if errors.As(err, &fmtErr) { 206 // this is not an elf file 207 log.WithFields("filename", filename, "error", err).Trace("not an ELF binary") 208 return nil, nil 209 } 210 return fileError(filename, err) 211 } 212 if bi == nil { 213 return nil, nil 214 } 215 return nativeImageElf{ 216 file: bi, 217 }, nil 218 } 219 220 // newMachO reads a Native Image from a Mach O executable. 221 func newMachO(filename string, r io.ReaderAt) (nativeImage, error) { 222 // First attempt to read an ELF file. 223 bi, err := macho.NewFile(r) 224 225 if err != nil { 226 var fmtErr *macho.FormatError 227 if errors.As(err, &fmtErr) { 228 // this is not a MachO file 229 log.WithFields("filename", filename, "error", err).Trace("not a MachO binary") 230 return nil, nil 231 } 232 } 233 if bi == nil { 234 return nil, nil 235 } 236 return nativeImageMachO{ 237 file: bi, 238 }, nil 239 } 240 241 // newPE reads a Native Image from a Portable Executable file. 242 func newPE(filename string, r io.ReaderAt) (nativeImage, error) { 243 // First attempt to read an PE file. 244 bi, err := pe.NewFile(r) 245 246 // The reader does not refer to a PE file. 247 if err != nil { 248 // note: there isn't a good way to distinguish between a format error and other kinds of errors 249 log.WithFields("filename", filename, "error", err).Trace("not a PE binary") 250 return nil, nil 251 } 252 if bi == nil { 253 return nil, nil 254 } 255 256 var exportSymbolsDataDirectory pe.DataDirectory 257 switch h := bi.OptionalHeader.(type) { 258 case *pe.OptionalHeader32: 259 exportSymbolsDataDirectory = h.DataDirectory[0] 260 case *pe.OptionalHeader64: 261 exportSymbolsDataDirectory = h.DataDirectory[0] 262 default: 263 return nil, fmt.Errorf("unable to get 'exportSymbolsDataDirectory' from binary: %s", filename) 264 } 265 // If we have no exported symbols it is not a Native Image 266 if exportSymbolsDataDirectory.Size == 0 { 267 return fileError(filename, errors.New(nativeImageMissingExportedDataDirectoryError)) 268 } 269 exportSymbolsOffset := uint64(exportSymbolsDataDirectory.VirtualAddress) 270 exports := make([]byte, exportSymbolsDataDirectory.Size) 271 _, err = r.ReadAt(exports, int64(exportSymbolsOffset)) 272 if err != nil { 273 return fileError(filename, fmt.Errorf("could not read the exported symbols data directory: %w", err)) 274 } 275 return nativeImagePE{ 276 file: bi, 277 reader: r, 278 exportSymbols: exportSymbolsDataDirectory, 279 exports: exports, 280 t: exportTypesPE{ 281 functionPointer: 0, 282 namePointer: 0, 283 headerAttribute: 0, 284 }, 285 header: exportPrefixPE{ 286 characteristics: 0, 287 timeDateStamp: 0, 288 majorVersion: 0, 289 minorVersion: 0, 290 name: 0, 291 base: 0, 292 }, 293 }, nil 294 } 295 296 // fetchPkgs obtains the packages given in the binary. 297 func (ni nativeImageElf) fetchPkgs() (pkgs []pkg.Package, retErr error) { 298 defer func() { 299 if r := recover(); r != nil { 300 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 301 // used without error later down the line. 302 retErr = fmt.Errorf("recovered from panic: %v", r) 303 } 304 }() 305 306 bi := ni.file 307 var sbom elf.Symbol 308 var sbomLength elf.Symbol 309 var svmVersion elf.Symbol 310 311 si, err := bi.Symbols() 312 if err != nil { 313 return nil, fmt.Errorf("no symbols found in binary: %w", err) 314 } 315 if si == nil { 316 return nil, errors.New(nativeImageMissingSymbolsError) 317 } 318 for _, s := range si { 319 switch s.Name { 320 case nativeImageSbomSymbol: 321 sbom = s 322 case nativeImageSbomLengthSymbol: 323 sbomLength = s 324 case nativeImageSbomVersionSymbol: 325 svmVersion = s 326 } 327 } 328 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 329 return nil, errors.New(nativeImageMissingSymbolsError) 330 } 331 dataSection := bi.Section(".data") 332 if dataSection == nil { 333 return nil, fmt.Errorf("no .data section found in binary: %w", err) 334 } 335 dataSectionBase := dataSection.SectionHeader.Addr 336 data, err := dataSection.Data() 337 if err != nil { 338 return nil, fmt.Errorf("cannot read the .data section: %w", err) 339 } 340 sbomLocation := sbom.Value - dataSectionBase 341 lengthLocation := sbomLength.Value - dataSectionBase 342 343 return decompressSbom(data, sbomLocation, lengthLocation) 344 } 345 346 // fetchPkgs obtains the packages from a Native Image given as a Mach O file. 347 func (ni nativeImageMachO) fetchPkgs() (pkgs []pkg.Package, retErr error) { 348 defer func() { 349 if r := recover(); r != nil { 350 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 351 // used without error later down the line. 352 retErr = fmt.Errorf("recovered from panic: %v", r) 353 } 354 }() 355 356 var sbom macho.Symbol 357 var sbomLength macho.Symbol 358 var svmVersion macho.Symbol 359 360 bi := ni.file 361 if bi.Symtab == nil { 362 return nil, errors.New(nativeImageMissingSymbolsError) 363 } 364 for _, s := range bi.Symtab.Syms { 365 switch s.Name { 366 case "_" + nativeImageSbomSymbol: 367 sbom = s 368 case "_" + nativeImageSbomLengthSymbol: 369 sbomLength = s 370 case "_" + nativeImageSbomVersionSymbol: 371 svmVersion = s 372 } 373 } 374 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 375 return nil, errors.New(nativeImageMissingSymbolsError) 376 } 377 378 dataSegment := bi.Segment("__DATA") 379 if dataSegment == nil { 380 return nil, nil 381 } 382 dataBuf, err := dataSegment.Data() 383 if err != nil { 384 log.Tracef("cannot obtain buffer from data segment") 385 return nil, nil 386 } 387 sbomLocation := sbom.Value - dataSegment.Addr 388 lengthLocation := sbomLength.Value - dataSegment.Addr 389 390 return decompressSbom(dataBuf, sbomLocation, lengthLocation) 391 } 392 393 // fetchExportAttribute obtains an attribute from the exported symbols directory entry. 394 func (ni nativeImagePE) fetchExportAttribute(i int) (uint32, error) { 395 var attribute uint32 396 n := len(ni.exports) 397 j := int(unsafe.Sizeof(ni.header)) + i*int(unsafe.Sizeof(ni.t.headerAttribute)) 398 if j+4 >= n { 399 log.Tracef("invalid index to export directory entry attribute: %v", j) 400 return uint32(0), errors.New(nativeImageInvalidIndexError) 401 } 402 p := bytes.NewBuffer(ni.exports[j : j+4]) 403 err := binary.Read(p, binary.LittleEndian, &attribute) 404 if err != nil { 405 log.Tracef("error fetching export directory entry attribute: %v", err) 406 return uint32(0), err 407 } 408 return attribute, nil 409 } 410 411 // fetchExportFunctionPointer obtains a function pointer from the exported symbols directory entry. 412 func (ni nativeImagePE) fetchExportFunctionPointer(functionsBase uint32, i uint32) (uint32, error) { 413 var pointer uint32 414 415 n := uint32(len(ni.exports)) 416 sz := uint32(unsafe.Sizeof(ni.t.functionPointer)) 417 j := functionsBase + i*sz 418 if j+sz >= n { 419 log.Tracef("invalid index to exported function: %v", j) 420 return uint32(0), errors.New(nativeImageInvalidIndexError) 421 } 422 p := bytes.NewBuffer(ni.exports[j : j+sz]) 423 err := binary.Read(p, binary.LittleEndian, &pointer) 424 if err != nil { 425 log.Tracef("error fetching exported function: %v", err) 426 return uint32(0), err 427 } 428 return pointer, nil 429 } 430 431 // fetchExportContent obtains the content of the export directory entry relevant to the SBOM. 432 func (ni nativeImagePE) fetchExportContent() (*exportContentPE, error) { 433 content := new(exportContentPE) 434 var err error 435 content.numberOfFunctions, err = ni.fetchExportAttribute(0) 436 if err != nil { 437 return nil, fmt.Errorf("could not find the number of exported 'number of functions' attribute: %w", err) 438 } 439 content.numberOfNames, err = ni.fetchExportAttribute(1) 440 if err != nil { 441 return nil, fmt.Errorf("could not find the number of exported 'number of names' attribute: %w", err) 442 } 443 content.addressOfFunctions, err = ni.fetchExportAttribute(2) 444 if err != nil { 445 return nil, fmt.Errorf("could not find the exported 'address of functions' attribute: %w", err) 446 } 447 content.addressOfNames, err = ni.fetchExportAttribute(3) 448 if err != nil { 449 return nil, fmt.Errorf("could not find the exported 'address of names' attribute: %w", err) 450 } 451 return content, nil 452 } 453 454 // fetchSbomSymbols enumerates the symbols exported by a binary to detect Native Image's SBOM symbols. 455 func (ni nativeImagePE) fetchSbomSymbols(content *exportContentPE) { 456 // Appending NULL bytes to symbol names simplifies finding them in the export data directory 457 sbomBytes := []byte(nativeImageSbomSymbol + "\x00") 458 sbomLengthBytes := []byte(nativeImageSbomLengthSymbol + "\x00") 459 svmVersionInfoBytes := []byte(nativeImageSbomVersionSymbol + "\x00") 460 n := uint32(len(ni.exports)) 461 462 // Find SBOM, SBOM Length, and SVM Version Symbol 463 for i := uint32(0); i < content.numberOfNames; i++ { 464 j := i * uint32(unsafe.Sizeof(ni.t.namePointer)) 465 addressBase := content.addressOfNames - ni.exportSymbols.VirtualAddress 466 k := addressBase + j 467 sz := uint32(unsafe.Sizeof(ni.t.namePointer)) 468 if k+sz >= n { 469 log.Tracef("invalid index to exported function: %v", k) 470 // If we are at the end of exports, stop looking 471 return 472 } 473 var symbolAddress uint32 474 p := bytes.NewBuffer(ni.exports[k : k+sz]) 475 err := binary.Read(p, binary.LittleEndian, &symbolAddress) 476 if err != nil { 477 log.Tracef("error fetching address of symbol %v", err) 478 return 479 } 480 symbolBase := symbolAddress - ni.exportSymbols.VirtualAddress 481 if symbolBase >= n { 482 log.Tracef("invalid index to exported symbol: %v", symbolBase) 483 return 484 } 485 switch { 486 case bytes.HasPrefix(ni.exports[symbolBase:], sbomBytes): 487 content.addressOfSbom = i 488 case bytes.HasPrefix(ni.exports[symbolBase:], sbomLengthBytes): 489 content.addressOfSbomLength = i 490 case bytes.HasPrefix(ni.exports[symbolBase:], svmVersionInfoBytes): 491 content.addressOfSvmVersion = i 492 } 493 } 494 } 495 496 // fetchPkgs obtains the packages from a Native Image given as a PE file. 497 func (ni nativeImagePE) fetchPkgs() (pkgs []pkg.Package, retErr error) { 498 defer func() { 499 if r := recover(); r != nil { 500 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 501 // used without error later down the line. 502 retErr = fmt.Errorf("recovered from panic: %v", r) 503 } 504 }() 505 506 content, err := ni.fetchExportContent() 507 if err != nil { 508 log.Debugf("could not fetch the content of the export directory entry: %v", err) 509 return nil, err 510 } 511 ni.fetchSbomSymbols(content) 512 if content.addressOfSbom == uint32(0) || content.addressOfSbomLength == uint32(0) || content.addressOfSvmVersion == uint32(0) { 513 return nil, errors.New(nativeImageMissingSymbolsError) 514 } 515 functionsBase := content.addressOfFunctions - ni.exportSymbols.VirtualAddress 516 sbomOffset := content.addressOfSbom 517 sbomAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomOffset) 518 if err != nil { 519 return nil, fmt.Errorf("could not fetch SBOM pointer from exported functions: %w", err) 520 } 521 sbomLengthOffset := content.addressOfSbomLength 522 sbomLengthAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomLengthOffset) 523 if err != nil { 524 return nil, fmt.Errorf("could not fetch SBOM length pointer from exported functions: %w", err) 525 } 526 bi := ni.file 527 dataSection := bi.Section(".data") 528 if dataSection == nil { 529 return nil, nil 530 } 531 dataBuf, err := dataSection.Data() 532 if err != nil { 533 log.Tracef("cannot obtain buffer from the java native-image .data section") 534 return nil, nil 535 } 536 sbomLocation := sbomAddress - dataSection.VirtualAddress 537 lengthLocation := sbomLengthAddress - dataSection.VirtualAddress 538 539 return decompressSbom(dataBuf, uint64(sbomLocation), uint64(lengthLocation)) 540 } 541 542 // fetchPkgs provides the packages available in a UnionReader. 543 func fetchPkgs(reader unionreader.UnionReader, filename string) []pkg.Package { 544 var pkgs []pkg.Package 545 imageFormats := []func(string, io.ReaderAt) (nativeImage, error){newElf, newMachO, newPE} 546 547 // NOTE: multiple readers are returned to cover universal binaries, which are files 548 // with more than one binary 549 readers, err := unionreader.GetReaders(reader) 550 if err != nil { 551 log.Debugf("failed to open the java native-image binary: %v", err) 552 return nil 553 } 554 for _, r := range readers { 555 for _, makeNativeImage := range imageFormats { 556 ni, err := makeNativeImage(filename, r) 557 if err != nil { 558 continue 559 } 560 if ni == nil { 561 continue 562 } 563 newPkgs, err := ni.fetchPkgs() 564 if err != nil { 565 log.Tracef("unable to extract SBOM from possible java native-image %s: %v", filename, err) 566 continue 567 } 568 pkgs = append(pkgs, newPkgs...) 569 } 570 } 571 return pkgs 572 } 573 574 // Catalog attempts to find any native image executables reachable from a resolver. 575 func (c *nativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 576 var pkgs []pkg.Package 577 fileMatches, err := resolver.FilesByMIMEType(mimetype.ExecutableMIMETypeSet.List()...) 578 if err != nil { 579 return pkgs, nil, fmt.Errorf("failed to find binaries by mime types: %w", err) 580 } 581 582 for _, location := range fileMatches { 583 newPkgs, err := processLocation(location, resolver) 584 if err != nil { 585 return nil, nil, err 586 } 587 pkgs = append(pkgs, newPkgs...) 588 } 589 590 return pkgs, nil, nil 591 } 592 593 func processLocation(location file.Location, resolver file.Resolver) ([]pkg.Package, error) { 594 readerCloser, err := resolver.FileContentsByLocation(location) 595 if err != nil { 596 log.Debugf("error opening file: %v", err) 597 return nil, nil 598 } 599 defer internal.CloseAndLogError(readerCloser, location.RealPath) 600 601 reader, err := unionreader.GetUnionReader(readerCloser) 602 if err != nil { 603 return nil, err 604 } 605 return fetchPkgs(reader, location.RealPath), nil 606 }