github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "context" 7 "debug/elf" 8 "debug/macho" 9 "debug/pe" 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "io" 14 "unsafe" 15 16 "github.com/anchore/syft/internal" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/internal/mimetype" 19 "github.com/anchore/syft/syft/artifact" 20 "github.com/anchore/syft/syft/file" 21 "github.com/anchore/syft/syft/format/cyclonedxjson" 22 "github.com/anchore/syft/syft/internal/unionreader" 23 "github.com/anchore/syft/syft/pkg" 24 ) 25 26 type nativeImage interface { 27 fetchPkgs() ([]pkg.Package, []artifact.Relationship, error) 28 } 29 30 type nativeImageElf struct { 31 file *elf.File 32 } 33 34 type nativeImageMachO struct { 35 file *macho.File 36 } 37 38 type exportTypesPE struct { 39 functionPointer uint32 40 namePointer uint32 41 headerAttribute uint32 42 } 43 44 type exportPrefixPE struct { 45 characteristics uint32 46 timeDateStamp uint32 47 majorVersion uint16 48 minorVersion uint16 49 name uint32 50 base uint32 51 } 52 53 type exportContentPE struct { 54 // Directory Entry Contents for finding SBOM symbols 55 numberOfFunctions uint32 56 numberOfNames uint32 57 addressOfFunctions uint32 58 addressOfNames uint32 59 // Locations of SBOM symbols in the .data section 60 addressOfSbom uint32 61 addressOfSbomLength uint32 62 addressOfSvmVersion uint32 63 } 64 65 // A nativeImagePE must maintain the underlying reader to fetch information unavailable in the Golang API. 66 type nativeImagePE struct { 67 file *pe.File 68 reader io.ReaderAt 69 exportSymbols pe.DataDirectory 70 exports []byte 71 t exportTypesPE 72 header exportPrefixPE 73 } 74 75 type nativeImageCataloger struct{} 76 77 const nativeImageCatalogerName = "graalvm-native-image-cataloger" 78 const nativeImageSbomSymbol = "sbom" 79 const nativeImageSbomLengthSymbol = "sbom_length" 80 const nativeImageSbomVersionSymbol = "__svm_version_info" 81 const nativeImageMissingSymbolsError = "one or more symbols are missing from the native image executable" 82 const nativeImageInvalidIndexError = "parsing the executable file generated an invalid index" 83 const nativeImageMissingExportedDataDirectoryError = "exported data directory is missing" 84 85 // NewNativeImageCataloger returns a new Native Image cataloger object. 86 func NewNativeImageCataloger() pkg.Cataloger { 87 return &nativeImageCataloger{} 88 } 89 90 // Name returns a string that uniquely describes a native image cataloger 91 func (c *nativeImageCataloger) Name() string { 92 return nativeImageCatalogerName 93 } 94 95 // decompressSbom returns the packages given within a native image executable's SBOM. 96 func decompressSbom(dataBuf []byte, sbomStart uint64, lengthStart uint64) ([]pkg.Package, []artifact.Relationship, error) { 97 lengthEnd := lengthStart + 8 98 bufLen := len(dataBuf) 99 if lengthEnd > uint64(bufLen) { 100 return nil, nil, errors.New("the 'sbom_length' symbol overflows the binary") 101 } 102 103 length := dataBuf[lengthStart:lengthEnd] 104 p := bytes.NewBuffer(length) 105 var storedLength uint64 106 err := binary.Read(p, binary.LittleEndian, &storedLength) 107 if err != nil { 108 return nil, nil, fmt.Errorf("could not read from binary file: %w", err) 109 } 110 111 log.WithFields("len", storedLength).Trace("found java native-image SBOM") 112 sbomEnd := sbomStart + storedLength 113 if sbomEnd > uint64(bufLen) { 114 return nil, nil, errors.New("the sbom symbol overflows the binary") 115 } 116 117 sbomCompressed := dataBuf[sbomStart:sbomEnd] 118 p = bytes.NewBuffer(sbomCompressed) 119 gzreader, err := gzip.NewReader(p) 120 if err != nil { 121 return nil, nil, fmt.Errorf("could not decompress the java native-image SBOM: %w", err) 122 } 123 124 sbom, _, _, err := cyclonedxjson.NewFormatDecoder().Decode(gzreader) 125 if err != nil { 126 return nil, nil, fmt.Errorf("could not unmarshal the java native-image SBOM: %w", err) 127 } 128 var pkgs []pkg.Package 129 for p := range sbom.Artifacts.Packages.Enumerate() { 130 pkgs = append(pkgs, p) 131 } 132 return pkgs, sbom.Relationships, nil 133 } 134 135 // fileError logs an error message when an executable cannot be read. 136 func fileError(filename string, err error) (nativeImage, error) { 137 // We could not read the file as a binary for the desired platform, but it may still be a native-image executable. 138 return nil, fmt.Errorf("unable to read executable (file=%q): %w", filename, err) 139 } 140 141 // newElf reads a Native Image from an ELF executable. 142 func newElf(filename string, r io.ReaderAt) (nativeImage, error) { 143 // First attempt to read an ELF file. 144 bi, err := elf.NewFile(r) 145 146 if err != nil { 147 var fmtErr *elf.FormatError 148 if errors.As(err, &fmtErr) { 149 // this is not an elf file 150 log.WithFields("filename", filename, "error", err).Trace("not an ELF binary") 151 return nil, nil 152 } 153 return fileError(filename, err) 154 } 155 if bi == nil { 156 return nil, nil 157 } 158 return nativeImageElf{ 159 file: bi, 160 }, nil 161 } 162 163 // newMachO reads a Native Image from a Mach O executable. 164 func newMachO(filename string, r io.ReaderAt) (nativeImage, error) { 165 // First attempt to read an ELF file. 166 bi, err := macho.NewFile(r) 167 168 if err != nil { 169 var fmtErr *macho.FormatError 170 if errors.As(err, &fmtErr) { 171 // this is not a MachO file 172 log.WithFields("filename", filename, "error", err).Trace("not a MachO binary") 173 return nil, nil 174 } 175 } 176 if bi == nil { 177 return nil, nil 178 } 179 return nativeImageMachO{ 180 file: bi, 181 }, nil 182 } 183 184 // newPE reads a Native Image from a Portable Executable file. 185 func newPE(filename string, r io.ReaderAt) (nativeImage, error) { 186 // First attempt to read an PE file. 187 bi, err := pe.NewFile(r) 188 189 // The reader does not refer to a PE file. 190 if err != nil { 191 // note: there isn't a good way to distinguish between a format error and other kinds of errors 192 log.WithFields("filename", filename, "error", err).Trace("not a PE binary") 193 return nil, nil 194 } 195 if bi == nil { 196 return nil, nil 197 } 198 199 var exportSymbolsDataDirectory pe.DataDirectory 200 switch h := bi.OptionalHeader.(type) { 201 case *pe.OptionalHeader32: 202 exportSymbolsDataDirectory = h.DataDirectory[0] 203 case *pe.OptionalHeader64: 204 exportSymbolsDataDirectory = h.DataDirectory[0] 205 default: 206 return nil, fmt.Errorf("unable to get 'exportSymbolsDataDirectory' from binary: %s", filename) 207 } 208 // If we have no exported symbols it is not a Native Image 209 if exportSymbolsDataDirectory.Size == 0 { 210 return fileError(filename, errors.New(nativeImageMissingExportedDataDirectoryError)) 211 } 212 exportSymbolsOffset := uint64(exportSymbolsDataDirectory.VirtualAddress) 213 exports := make([]byte, exportSymbolsDataDirectory.Size) 214 _, err = r.ReadAt(exports, int64(exportSymbolsOffset)) 215 if err != nil { 216 return fileError(filename, fmt.Errorf("could not read the exported symbols data directory: %w", err)) 217 } 218 return nativeImagePE{ 219 file: bi, 220 reader: r, 221 exportSymbols: exportSymbolsDataDirectory, 222 exports: exports, 223 t: exportTypesPE{ 224 functionPointer: 0, 225 namePointer: 0, 226 headerAttribute: 0, 227 }, 228 header: exportPrefixPE{ 229 characteristics: 0, 230 timeDateStamp: 0, 231 majorVersion: 0, 232 minorVersion: 0, 233 name: 0, 234 base: 0, 235 }, 236 }, nil 237 } 238 239 // fetchPkgs obtains the packages given in the binary. 240 func (ni nativeImageElf) fetchPkgs() (pkgs []pkg.Package, relationships []artifact.Relationship, retErr error) { 241 defer func() { 242 if r := recover(); r != nil { 243 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 244 // used without error later down the line. 245 retErr = fmt.Errorf("recovered from panic: %v", r) 246 } 247 }() 248 249 bi := ni.file 250 var sbom elf.Symbol 251 var sbomLength elf.Symbol 252 var svmVersion elf.Symbol 253 254 si, err := ni.getSymbols() 255 if err != nil { 256 return nil, nil, err 257 } 258 if len(si) == 0 { 259 return nil, nil, errors.New(nativeImageMissingSymbolsError) 260 } 261 for _, s := range si { 262 switch s.Name { 263 case nativeImageSbomSymbol: 264 sbom = s 265 case nativeImageSbomLengthSymbol: 266 sbomLength = s 267 case nativeImageSbomVersionSymbol: 268 svmVersion = s 269 } 270 } 271 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 272 return nil, nil, errors.New(nativeImageMissingSymbolsError) 273 } 274 dataSection := bi.Section(".data") 275 if dataSection == nil { 276 return nil, nil, fmt.Errorf("no .data section found in binary: %w", err) 277 } 278 dataSectionBase := dataSection.Addr 279 data, err := dataSection.Data() 280 if err != nil { 281 return nil, nil, fmt.Errorf("cannot read the .data section: %w", err) 282 } 283 sbomLocation := sbom.Value - dataSectionBase 284 lengthLocation := sbomLength.Value - dataSectionBase 285 286 return decompressSbom(data, sbomLocation, lengthLocation) 287 } 288 289 // getSymbols obtains the union of the symbols in the .symtab and .dynsym sections of the ELF file 290 func (ni nativeImageElf) getSymbols() ([]elf.Symbol, error) { 291 var symbols []elf.Symbol 292 symsErr := error(nil) 293 dynErr := error(nil) 294 295 if syms, err := ni.file.Symbols(); err == nil { 296 symbols = append(symbols, syms...) 297 } else { 298 symsErr = err 299 } 300 301 if dynSyms, err := ni.file.DynamicSymbols(); err == nil { 302 symbols = append(symbols, dynSyms...) 303 } else { 304 dynErr = err 305 } 306 307 if symsErr != nil && dynErr != nil { 308 return nil, fmt.Errorf("could not retrieve symbols from binary: SHT_SYMTAB error: %v, SHT_DYNSYM error: %v", symsErr, dynErr) 309 } 310 311 return symbols, nil 312 } 313 314 // fetchPkgs obtains the packages from a Native Image given as a Mach O file. 315 func (ni nativeImageMachO) fetchPkgs() (pkgs []pkg.Package, relationships []artifact.Relationship, retErr error) { 316 defer func() { 317 if r := recover(); r != nil { 318 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 319 // used without error later down the line. 320 retErr = fmt.Errorf("recovered from panic: %v", r) 321 } 322 }() 323 324 var sbom macho.Symbol 325 var sbomLength macho.Symbol 326 var svmVersion macho.Symbol 327 328 bi := ni.file 329 if bi.Symtab == nil { 330 return nil, nil, errors.New(nativeImageMissingSymbolsError) 331 } 332 for _, s := range bi.Symtab.Syms { 333 switch s.Name { 334 case "_" + nativeImageSbomSymbol: 335 sbom = s 336 case "_" + nativeImageSbomLengthSymbol: 337 sbomLength = s 338 case "_" + nativeImageSbomVersionSymbol: 339 svmVersion = s 340 } 341 } 342 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 343 return nil, nil, errors.New(nativeImageMissingSymbolsError) 344 } 345 346 dataSegment := bi.Segment("__DATA") 347 if dataSegment == nil { 348 return nil, nil, nil 349 } 350 dataBuf, err := dataSegment.Data() 351 if err != nil { 352 log.Tracef("cannot obtain buffer from data segment") 353 return nil, nil, nil 354 } 355 sbomLocation := sbom.Value - dataSegment.Addr 356 lengthLocation := sbomLength.Value - dataSegment.Addr 357 358 return decompressSbom(dataBuf, sbomLocation, lengthLocation) 359 } 360 361 // fetchExportAttribute obtains an attribute from the exported symbols directory entry. 362 func (ni nativeImagePE) fetchExportAttribute(i int) (uint32, error) { 363 var attribute uint32 364 n := len(ni.exports) 365 j := int(unsafe.Sizeof(ni.header)) + i*int(unsafe.Sizeof(ni.t.headerAttribute)) 366 if j+4 >= n { 367 log.Tracef("invalid index to export directory entry attribute: %v", j) 368 return uint32(0), errors.New(nativeImageInvalidIndexError) 369 } 370 p := bytes.NewBuffer(ni.exports[j : j+4]) 371 err := binary.Read(p, binary.LittleEndian, &attribute) 372 if err != nil { 373 log.Tracef("error fetching export directory entry attribute: %v", err) 374 return uint32(0), err 375 } 376 return attribute, nil 377 } 378 379 // fetchExportFunctionPointer obtains a function pointer from the exported symbols directory entry. 380 func (ni nativeImagePE) fetchExportFunctionPointer(functionsBase uint32, i uint32) (uint32, error) { 381 var pointer uint32 382 383 n := uint32(len(ni.exports)) 384 sz := uint32(unsafe.Sizeof(ni.t.functionPointer)) 385 j := functionsBase + i*sz 386 if j+sz >= n { 387 log.Tracef("invalid index to exported function: %v", j) 388 return uint32(0), errors.New(nativeImageInvalidIndexError) 389 } 390 p := bytes.NewBuffer(ni.exports[j : j+sz]) 391 err := binary.Read(p, binary.LittleEndian, &pointer) 392 if err != nil { 393 log.Tracef("error fetching exported function: %v", err) 394 return uint32(0), err 395 } 396 return pointer, nil 397 } 398 399 // fetchExportContent obtains the content of the export directory entry relevant to the SBOM. 400 func (ni nativeImagePE) fetchExportContent() (*exportContentPE, error) { 401 content := new(exportContentPE) 402 var err error 403 content.numberOfFunctions, err = ni.fetchExportAttribute(0) 404 if err != nil { 405 return nil, fmt.Errorf("could not find the number of exported 'number of functions' attribute: %w", err) 406 } 407 content.numberOfNames, err = ni.fetchExportAttribute(1) 408 if err != nil { 409 return nil, fmt.Errorf("could not find the number of exported 'number of names' attribute: %w", err) 410 } 411 content.addressOfFunctions, err = ni.fetchExportAttribute(2) 412 if err != nil { 413 return nil, fmt.Errorf("could not find the exported 'address of functions' attribute: %w", err) 414 } 415 content.addressOfNames, err = ni.fetchExportAttribute(3) 416 if err != nil { 417 return nil, fmt.Errorf("could not find the exported 'address of names' attribute: %w", err) 418 } 419 return content, nil 420 } 421 422 // fetchSbomSymbols enumerates the symbols exported by a binary to detect Native Image's SBOM symbols. 423 func (ni nativeImagePE) fetchSbomSymbols(content *exportContentPE) { 424 // Appending NULL bytes to symbol names simplifies finding them in the export data directory 425 sbomBytes := []byte(nativeImageSbomSymbol + "\x00") 426 sbomLengthBytes := []byte(nativeImageSbomLengthSymbol + "\x00") 427 svmVersionInfoBytes := []byte(nativeImageSbomVersionSymbol + "\x00") 428 n := uint32(len(ni.exports)) 429 430 // Find SBOM, SBOM Length, and SVM Version Symbol 431 for i := uint32(0); i < content.numberOfNames; i++ { 432 j := i * uint32(unsafe.Sizeof(ni.t.namePointer)) 433 addressBase := content.addressOfNames - ni.exportSymbols.VirtualAddress 434 k := addressBase + j 435 sz := uint32(unsafe.Sizeof(ni.t.namePointer)) 436 if k+sz >= n { 437 log.Tracef("invalid index to exported function: %v", k) 438 // If we are at the end of exports, stop looking 439 return 440 } 441 var symbolAddress uint32 442 p := bytes.NewBuffer(ni.exports[k : k+sz]) 443 err := binary.Read(p, binary.LittleEndian, &symbolAddress) 444 if err != nil { 445 log.Tracef("error fetching address of symbol %v", err) 446 return 447 } 448 symbolBase := symbolAddress - ni.exportSymbols.VirtualAddress 449 if symbolBase >= n { 450 log.Tracef("invalid index to exported symbol: %v", symbolBase) 451 return 452 } 453 switch { 454 case bytes.HasPrefix(ni.exports[symbolBase:], sbomBytes): 455 content.addressOfSbom = i 456 case bytes.HasPrefix(ni.exports[symbolBase:], sbomLengthBytes): 457 content.addressOfSbomLength = i 458 case bytes.HasPrefix(ni.exports[symbolBase:], svmVersionInfoBytes): 459 content.addressOfSvmVersion = i 460 } 461 } 462 } 463 464 // fetchPkgs obtains the packages from a Native Image given as a PE file. 465 func (ni nativeImagePE) fetchPkgs() (pkgs []pkg.Package, relationships []artifact.Relationship, retErr error) { 466 defer func() { 467 if r := recover(); r != nil { 468 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 469 // used without error later down the line. 470 retErr = fmt.Errorf("recovered from panic: %v", r) 471 } 472 }() 473 474 content, err := ni.fetchExportContent() 475 if err != nil { 476 log.Debugf("could not fetch the content of the export directory entry: %v", err) 477 return nil, nil, err 478 } 479 ni.fetchSbomSymbols(content) 480 if content.addressOfSbom == uint32(0) || content.addressOfSbomLength == uint32(0) || content.addressOfSvmVersion == uint32(0) { 481 return nil, nil, errors.New(nativeImageMissingSymbolsError) 482 } 483 functionsBase := content.addressOfFunctions - ni.exportSymbols.VirtualAddress 484 sbomOffset := content.addressOfSbom 485 sbomAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomOffset) 486 if err != nil { 487 return nil, nil, fmt.Errorf("could not fetch SBOM pointer from exported functions: %w", err) 488 } 489 sbomLengthOffset := content.addressOfSbomLength 490 sbomLengthAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomLengthOffset) 491 if err != nil { 492 return nil, nil, fmt.Errorf("could not fetch SBOM length pointer from exported functions: %w", err) 493 } 494 bi := ni.file 495 dataSection := bi.Section(".data") 496 if dataSection == nil { 497 return nil, nil, nil 498 } 499 dataBuf, err := dataSection.Data() 500 if err != nil { 501 log.Tracef("cannot obtain buffer from the java native-image .data section") 502 return nil, nil, nil 503 } 504 sbomLocation := sbomAddress - dataSection.VirtualAddress 505 lengthLocation := sbomLengthAddress - dataSection.VirtualAddress 506 507 return decompressSbom(dataBuf, uint64(sbomLocation), uint64(lengthLocation)) 508 } 509 510 // fetchPkgs provides the packages available in a UnionReader. 511 func fetchPkgs(reader unionreader.UnionReader, location file.Location) ([]pkg.Package, []artifact.Relationship) { 512 var pkgs []pkg.Package 513 var relationships []artifact.Relationship 514 imageFormats := []func(string, io.ReaderAt) (nativeImage, error){newElf, newMachO, newPE} 515 516 // NOTE: multiple readers are returned to cover universal binaries, which are files 517 // with more than one binary 518 readers, err := unionreader.GetReaders(reader) 519 if err != nil { 520 log.Debugf("failed to open the java native-image binary: %v", err) 521 return nil, nil 522 } 523 filename := location.RealPath 524 for _, r := range readers { 525 for _, makeNativeImage := range imageFormats { 526 ni, err := makeNativeImage(filename, r) 527 if err != nil { 528 continue 529 } 530 if ni == nil { 531 continue 532 } 533 newPkgs, newRelationships, err := ni.fetchPkgs() 534 if err != nil { 535 log.Tracef("unable to extract SBOM from possible java native-image %s: %v", filename, err) 536 continue 537 } 538 // Associate extracted packages with the native image location 539 for i := range newPkgs { 540 newPkgs[i].Locations.Add(location) 541 } 542 pkgs = append(pkgs, newPkgs...) 543 relationships = append(relationships, newRelationships...) 544 } 545 } 546 return pkgs, relationships 547 } 548 549 // Catalog attempts to find any native image executables reachable from a resolver. 550 func (c *nativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 551 var pkgs []pkg.Package 552 var relationships []artifact.Relationship 553 fileMatches, err := resolver.FilesByMIMEType(mimetype.ExecutableMIMETypeSet.List()...) 554 if err != nil { 555 return pkgs, nil, fmt.Errorf("failed to find binaries by mime types: %w", err) 556 } 557 558 for _, location := range fileMatches { 559 newPkgs, newRelationships, err := processLocation(location, resolver) 560 if err != nil { 561 return nil, nil, err 562 } 563 pkgs = append(pkgs, newPkgs...) 564 relationships = append(relationships, newRelationships...) 565 } 566 567 return pkgs, relationships, nil 568 } 569 570 func processLocation(location file.Location, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 571 readerCloser, err := resolver.FileContentsByLocation(location) 572 if err != nil { 573 log.Debugf("error opening file: %v", err) 574 return nil, nil, nil 575 } 576 defer internal.CloseAndLogError(readerCloser, location.RealPath) 577 578 reader, err := unionreader.GetUnionReader(readerCloser) 579 if err != nil { 580 return nil, nil, err 581 } 582 pkgs, relationships := fetchPkgs(reader, location) 583 return pkgs, relationships, nil 584 }