github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "debug/elf" 7 "debug/macho" 8 "debug/pe" 9 "encoding/binary" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "unsafe" 15 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/cpe" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/lineaje-labs/syft/internal" 21 "github.com/lineaje-labs/syft/internal/log" 22 "github.com/lineaje-labs/syft/syft/pkg/cataloger/internal/unionreader" 23 ) 24 25 type nativeImageCycloneDX struct { 26 BomFormat string `json:"bomFormat"` 27 SpecVersion string `json:"specVersion"` 28 Version int `json:"version"` 29 Components []nativeImageComponent `json:"components"` 30 } 31 32 type nativeImageComponent struct { 33 Type string `json:"type"` 34 Group string `json:"group"` 35 Name string `json:"name"` 36 Version string `json:"version"` 37 Properties []nativeImageCPE `json:"properties"` 38 } 39 40 type nativeImageCPE struct { 41 Name string `json:"name"` 42 Value string `json:"value"` 43 } 44 45 type nativeImage interface { 46 fetchPkgs() ([]pkg.Package, error) 47 } 48 49 type nativeImageElf struct { 50 file *elf.File 51 } 52 53 type nativeImageMachO struct { 54 file *macho.File 55 } 56 57 type exportTypesPE struct { 58 functionPointer uint32 59 namePointer uint32 60 headerAttribute uint32 61 } 62 63 type exportPrefixPE struct { 64 characteristics uint32 65 timeDateStamp uint32 66 majorVersion uint16 67 minorVersion uint16 68 name uint32 69 base uint32 70 } 71 72 type exportContentPE struct { 73 // Directory Entry Contents for finding SBOM symbols 74 numberOfFunctions uint32 75 numberOfNames uint32 76 addressOfFunctions uint32 77 addressOfNames uint32 78 // Locations of SBOM symbols in the .data section 79 addressOfSbom uint32 80 addressOfSbomLength uint32 81 addressOfSvmVersion uint32 82 } 83 84 // A nativeImagePE must maintain the underlying reader to fetch information unavailable in the Golang API. 85 type nativeImagePE struct { 86 file *pe.File 87 reader io.ReaderAt 88 exportSymbols pe.DataDirectory 89 exports []byte 90 t exportTypesPE 91 header exportPrefixPE 92 } 93 94 type NativeImageCataloger struct{} 95 96 const nativeImageCatalogerName = "graalvm-native-image-cataloger" 97 const nativeImageSbomSymbol = "sbom" 98 const nativeImageSbomLengthSymbol = "sbom_length" 99 const nativeImageSbomVersionSymbol = "__svm_version_info" 100 const nativeImageMissingSymbolsError = "one or more symbols are missing from the native image executable" 101 const nativeImageInvalidIndexError = "parsing the executable file generated an invalid index" 102 const nativeImageMissingExportedDataDirectoryError = "exported data directory is missing" 103 104 // newNativeImageCataloger returns a new Native Image cataloger object. 105 func NewNativeImageCataloger() pkg.Cataloger { 106 return &NativeImageCataloger{} 107 } 108 109 // Name returns a string that uniquely describes a native image cataloger 110 func (c *NativeImageCataloger) Name() string { 111 return nativeImageCatalogerName 112 } 113 114 // getPackage returns the package given within a NativeImageComponent. 115 func getPackage(component nativeImageComponent) pkg.Package { 116 var cpes []cpe.CPE 117 for _, property := range component.Properties { 118 c, err := cpe.New(property.Value) 119 if err != nil { 120 log.Debugf("unable to parse CPE: %v", err) 121 continue 122 } 123 cpes = append(cpes, c) 124 } 125 return pkg.Package{ 126 Name: component.Name, 127 Version: component.Version, 128 Language: pkg.Java, 129 Type: pkg.GraalVMNativeImagePkg, 130 FoundBy: nativeImageCatalogerName, 131 Metadata: pkg.JavaArchive{ 132 PomProperties: &pkg.JavaPomProperties{ 133 GroupID: component.Group, 134 }, 135 }, 136 CPEs: cpes, 137 } 138 } 139 140 // decompressSbom returns the packages given within a native image executable's SBOM. 141 func decompressSbom(dataBuf []byte, sbomStart uint64, lengthStart uint64) ([]pkg.Package, error) { 142 var pkgs []pkg.Package 143 144 lengthEnd := lengthStart + 8 145 bufLen := len(dataBuf) 146 if lengthEnd > uint64(bufLen) { 147 return nil, errors.New("the 'sbom_length' symbol overflows the binary") 148 } 149 150 length := dataBuf[lengthStart:lengthEnd] 151 p := bytes.NewBuffer(length) 152 var storedLength uint64 153 err := binary.Read(p, binary.LittleEndian, &storedLength) 154 if err != nil { 155 return nil, fmt.Errorf("could not read from binary file: %w", err) 156 } 157 158 log.WithFields("len", storedLength).Trace("found java native-image SBOM") 159 sbomEnd := sbomStart + storedLength 160 if sbomEnd > uint64(bufLen) { 161 return nil, errors.New("the sbom symbol overflows the binary") 162 } 163 164 sbomCompressed := dataBuf[sbomStart:sbomEnd] 165 p = bytes.NewBuffer(sbomCompressed) 166 gzreader, err := gzip.NewReader(p) 167 if err != nil { 168 return nil, fmt.Errorf("could not decompress the java native-image SBOM: %w", err) 169 } 170 171 output, err := io.ReadAll(gzreader) 172 if err != nil { 173 return nil, fmt.Errorf("could not read the java native-image SBOM: %w", err) 174 } 175 176 var sbomContent nativeImageCycloneDX 177 err = json.Unmarshal(output, &sbomContent) 178 if err != nil { 179 return nil, fmt.Errorf("could not unmarshal the java native-image SBOM: %w", err) 180 } 181 182 for _, component := range sbomContent.Components { 183 p := getPackage(component) 184 pkgs = append(pkgs, p) 185 } 186 187 return pkgs, nil 188 } 189 190 // fileError logs an error message when an executable cannot be read. 191 func fileError(filename string, err error) (nativeImage, error) { 192 // We could not read the file as a binary for the desired platform, but it may still be a native-image executable. 193 return nil, fmt.Errorf("unable to read executable (file=%q): %w", filename, err) 194 } 195 196 // newElf reads a Native Image from an ELF executable. 197 func newElf(filename string, r io.ReaderAt) (nativeImage, error) { 198 // First attempt to read an ELF file. 199 bi, err := elf.NewFile(r) 200 201 if err != nil { 202 var fmtErr *elf.FormatError 203 if errors.As(err, &fmtErr) { 204 // this is not an elf file 205 log.WithFields("filename", filename, "error", err).Trace("not an ELF binary") 206 return nil, nil 207 } 208 return fileError(filename, err) 209 } 210 if bi == nil { 211 return nil, nil 212 } 213 return nativeImageElf{ 214 file: bi, 215 }, nil 216 } 217 218 // newMachO reads a Native Image from a Mach O executable. 219 func newMachO(filename string, r io.ReaderAt) (nativeImage, error) { 220 // First attempt to read an ELF file. 221 bi, err := macho.NewFile(r) 222 223 if err != nil { 224 var fmtErr *macho.FormatError 225 if errors.As(err, &fmtErr) { 226 // this is not a MachO file 227 log.WithFields("filename", filename, "error", err).Trace("not a MachO binary") 228 return nil, nil 229 } 230 } 231 if bi == nil { 232 return nil, nil 233 } 234 return nativeImageMachO{ 235 file: bi, 236 }, nil 237 } 238 239 // newPE reads a Native Image from a Portable Executable file. 240 func newPE(filename string, r io.ReaderAt) (nativeImage, error) { 241 // First attempt to read an PE file. 242 bi, err := pe.NewFile(r) 243 244 // The reader does not refer to a PE file. 245 if err != nil { 246 // note: there isn't a good way to distinguish between a format error and other kinds of errors 247 log.WithFields("filename", filename, "error", err).Trace("not a PE binary") 248 return nil, nil 249 } 250 if bi == nil { 251 return nil, nil 252 } 253 254 var exportSymbolsDataDirectory pe.DataDirectory 255 switch h := bi.OptionalHeader.(type) { 256 case *pe.OptionalHeader32: 257 exportSymbolsDataDirectory = h.DataDirectory[0] 258 case *pe.OptionalHeader64: 259 exportSymbolsDataDirectory = h.DataDirectory[0] 260 default: 261 return nil, fmt.Errorf("unable to get 'exportSymbolsDataDirectory' from binary: %s", filename) 262 } 263 // If we have no exported symbols it is not a Native Image 264 if exportSymbolsDataDirectory.Size == 0 { 265 return fileError(filename, errors.New(nativeImageMissingExportedDataDirectoryError)) 266 } 267 exportSymbolsOffset := uint64(exportSymbolsDataDirectory.VirtualAddress) 268 exports := make([]byte, exportSymbolsDataDirectory.Size) 269 _, err = r.ReadAt(exports, int64(exportSymbolsOffset)) 270 if err != nil { 271 return fileError(filename, fmt.Errorf("could not read the exported symbols data directory: %w", err)) 272 } 273 return nativeImagePE{ 274 file: bi, 275 reader: r, 276 exportSymbols: exportSymbolsDataDirectory, 277 exports: exports, 278 t: exportTypesPE{ 279 functionPointer: 0, 280 namePointer: 0, 281 headerAttribute: 0, 282 }, 283 header: exportPrefixPE{ 284 characteristics: 0, 285 timeDateStamp: 0, 286 majorVersion: 0, 287 minorVersion: 0, 288 name: 0, 289 base: 0, 290 }, 291 }, nil 292 } 293 294 // fetchPkgs obtains the packages given in the binary. 295 func (ni nativeImageElf) fetchPkgs() (pkgs []pkg.Package, retErr error) { 296 defer func() { 297 if r := recover(); r != nil { 298 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 299 // used without error later down the line. 300 retErr = fmt.Errorf("recovered from panic: %v", r) 301 } 302 }() 303 304 bi := ni.file 305 var sbom elf.Symbol 306 var sbomLength elf.Symbol 307 var svmVersion elf.Symbol 308 309 si, err := bi.Symbols() 310 if err != nil { 311 return nil, fmt.Errorf("no symbols found in binary: %w", err) 312 } 313 if si == nil { 314 return nil, errors.New(nativeImageMissingSymbolsError) 315 } 316 for _, s := range si { 317 switch s.Name { 318 case nativeImageSbomSymbol: 319 sbom = s 320 case nativeImageSbomLengthSymbol: 321 sbomLength = s 322 case nativeImageSbomVersionSymbol: 323 svmVersion = s 324 } 325 } 326 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 327 return nil, errors.New(nativeImageMissingSymbolsError) 328 } 329 dataSection := bi.Section(".data") 330 if dataSection == nil { 331 return nil, fmt.Errorf("no .data section found in binary: %w", err) 332 } 333 dataSectionBase := dataSection.SectionHeader.Addr 334 data, err := dataSection.Data() 335 if err != nil { 336 return nil, fmt.Errorf("cannot read the .data section: %w", err) 337 } 338 sbomLocation := sbom.Value - dataSectionBase 339 lengthLocation := sbomLength.Value - dataSectionBase 340 341 return decompressSbom(data, sbomLocation, lengthLocation) 342 } 343 344 // fetchPkgs obtains the packages from a Native Image given as a Mach O file. 345 func (ni nativeImageMachO) fetchPkgs() (pkgs []pkg.Package, retErr error) { 346 defer func() { 347 if r := recover(); r != nil { 348 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 349 // used without error later down the line. 350 retErr = fmt.Errorf("recovered from panic: %v", r) 351 } 352 }() 353 354 var sbom macho.Symbol 355 var sbomLength macho.Symbol 356 var svmVersion macho.Symbol 357 358 bi := ni.file 359 if bi.Symtab == nil { 360 return nil, errors.New(nativeImageMissingSymbolsError) 361 } 362 for _, s := range bi.Symtab.Syms { 363 switch s.Name { 364 case "_" + nativeImageSbomSymbol: 365 sbom = s 366 case "_" + nativeImageSbomLengthSymbol: 367 sbomLength = s 368 case "_" + nativeImageSbomVersionSymbol: 369 svmVersion = s 370 } 371 } 372 if sbom.Value == 0 || sbomLength.Value == 0 || svmVersion.Value == 0 { 373 return nil, errors.New(nativeImageMissingSymbolsError) 374 } 375 376 dataSegment := bi.Segment("__DATA") 377 if dataSegment == nil { 378 return nil, nil 379 } 380 dataBuf, err := dataSegment.Data() 381 if err != nil { 382 log.Tracef("cannot obtain buffer from data segment") 383 return nil, nil 384 } 385 sbomLocation := sbom.Value - dataSegment.Addr 386 lengthLocation := sbomLength.Value - dataSegment.Addr 387 388 return decompressSbom(dataBuf, sbomLocation, lengthLocation) 389 } 390 391 // fetchExportAttribute obtains an attribute from the exported symbols directory entry. 392 func (ni nativeImagePE) fetchExportAttribute(i int) (uint32, error) { 393 var attribute uint32 394 n := len(ni.exports) 395 j := int(unsafe.Sizeof(ni.header)) + i*int(unsafe.Sizeof(ni.t.headerAttribute)) 396 if j+4 >= n { 397 log.Tracef("invalid index to export directory entry attribute: %v", j) 398 return uint32(0), errors.New(nativeImageInvalidIndexError) 399 } 400 p := bytes.NewBuffer(ni.exports[j : j+4]) 401 err := binary.Read(p, binary.LittleEndian, &attribute) 402 if err != nil { 403 log.Tracef("error fetching export directory entry attribute: %v", err) 404 return uint32(0), err 405 } 406 return attribute, nil 407 } 408 409 // fetchExportFunctionPointer obtains a function pointer from the exported symbols directory entry. 410 func (ni nativeImagePE) fetchExportFunctionPointer(functionsBase uint32, i uint32) (uint32, error) { 411 var pointer uint32 412 413 n := uint32(len(ni.exports)) 414 sz := uint32(unsafe.Sizeof(ni.t.functionPointer)) 415 j := functionsBase + i*sz 416 if j+sz >= n { 417 log.Tracef("invalid index to exported function: %v", j) 418 return uint32(0), errors.New(nativeImageInvalidIndexError) 419 } 420 p := bytes.NewBuffer(ni.exports[j : j+sz]) 421 err := binary.Read(p, binary.LittleEndian, &pointer) 422 if err != nil { 423 log.Tracef("error fetching exported function: %v", err) 424 return uint32(0), err 425 } 426 return pointer, nil 427 } 428 429 // fetchExportContent obtains the content of the export directory entry relevant to the SBOM. 430 func (ni nativeImagePE) fetchExportContent() (*exportContentPE, error) { 431 content := new(exportContentPE) 432 var err error 433 content.numberOfFunctions, err = ni.fetchExportAttribute(0) 434 if err != nil { 435 return nil, fmt.Errorf("could not find the number of exported 'number of functions' attribute: %w", err) 436 } 437 content.numberOfNames, err = ni.fetchExportAttribute(1) 438 if err != nil { 439 return nil, fmt.Errorf("could not find the number of exported 'number of names' attribute: %w", err) 440 } 441 content.addressOfFunctions, err = ni.fetchExportAttribute(2) 442 if err != nil { 443 return nil, fmt.Errorf("could not find the exported 'address of functions' attribute: %w", err) 444 } 445 content.addressOfNames, err = ni.fetchExportAttribute(3) 446 if err != nil { 447 return nil, fmt.Errorf("could not find the exported 'address of names' attribute: %w", err) 448 } 449 return content, nil 450 } 451 452 // fetchSbomSymbols enumerates the symbols exported by a binary to detect Native Image's SBOM symbols. 453 func (ni nativeImagePE) fetchSbomSymbols(content *exportContentPE) { 454 // Appending NULL bytes to symbol names simplifies finding them in the export data directory 455 sbomBytes := []byte(nativeImageSbomSymbol + "\x00") 456 sbomLengthBytes := []byte(nativeImageSbomLengthSymbol + "\x00") 457 svmVersionInfoBytes := []byte(nativeImageSbomVersionSymbol + "\x00") 458 n := uint32(len(ni.exports)) 459 460 // Find SBOM, SBOM Length, and SVM Version Symbol 461 for i := uint32(0); i < content.numberOfNames; i++ { 462 j := i * uint32(unsafe.Sizeof(ni.t.namePointer)) 463 addressBase := content.addressOfNames - ni.exportSymbols.VirtualAddress 464 k := addressBase + j 465 sz := uint32(unsafe.Sizeof(ni.t.namePointer)) 466 if k+sz >= n { 467 log.Tracef("invalid index to exported function: %v", k) 468 // If we are at the end of exports, stop looking 469 return 470 } 471 var symbolAddress uint32 472 p := bytes.NewBuffer(ni.exports[k : k+sz]) 473 err := binary.Read(p, binary.LittleEndian, &symbolAddress) 474 if err != nil { 475 log.Tracef("error fetching address of symbol %v", err) 476 return 477 } 478 symbolBase := symbolAddress - ni.exportSymbols.VirtualAddress 479 if symbolBase >= n { 480 log.Tracef("invalid index to exported symbol: %v", symbolBase) 481 return 482 } 483 switch { 484 case bytes.HasPrefix(ni.exports[symbolBase:], sbomBytes): 485 content.addressOfSbom = i 486 case bytes.HasPrefix(ni.exports[symbolBase:], sbomLengthBytes): 487 content.addressOfSbomLength = i 488 case bytes.HasPrefix(ni.exports[symbolBase:], svmVersionInfoBytes): 489 content.addressOfSvmVersion = i 490 } 491 } 492 } 493 494 // fetchPkgs obtains the packages from a Native Image given as a PE file. 495 func (ni nativeImagePE) fetchPkgs() (pkgs []pkg.Package, retErr error) { 496 defer func() { 497 if r := recover(); r != nil { 498 // this can happen in cases where a malformed binary is passed in can be initially parsed, but not 499 // used without error later down the line. 500 retErr = fmt.Errorf("recovered from panic: %v", r) 501 } 502 }() 503 504 content, err := ni.fetchExportContent() 505 if err != nil { 506 log.Debugf("could not fetch the content of the export directory entry: %v", err) 507 return nil, err 508 } 509 ni.fetchSbomSymbols(content) 510 if content.addressOfSbom == uint32(0) || content.addressOfSbomLength == uint32(0) || content.addressOfSvmVersion == uint32(0) { 511 return nil, errors.New(nativeImageMissingSymbolsError) 512 } 513 functionsBase := content.addressOfFunctions - ni.exportSymbols.VirtualAddress 514 sbomOffset := content.addressOfSbom 515 sbomAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomOffset) 516 if err != nil { 517 return nil, fmt.Errorf("could not fetch SBOM pointer from exported functions: %w", err) 518 } 519 sbomLengthOffset := content.addressOfSbomLength 520 sbomLengthAddress, err := ni.fetchExportFunctionPointer(functionsBase, sbomLengthOffset) 521 if err != nil { 522 return nil, fmt.Errorf("could not fetch SBOM length pointer from exported functions: %w", err) 523 } 524 bi := ni.file 525 dataSection := bi.Section(".data") 526 if dataSection == nil { 527 return nil, nil 528 } 529 dataBuf, err := dataSection.Data() 530 if err != nil { 531 log.Tracef("cannot obtain buffer from the java native-image .data section") 532 return nil, nil 533 } 534 sbomLocation := sbomAddress - dataSection.VirtualAddress 535 lengthLocation := sbomLengthAddress - dataSection.VirtualAddress 536 537 return decompressSbom(dataBuf, uint64(sbomLocation), uint64(lengthLocation)) 538 } 539 540 // fetchPkgs provides the packages available in a UnionReader. 541 func fetchPkgs(reader unionreader.UnionReader, filename string) []pkg.Package { 542 var pkgs []pkg.Package 543 imageFormats := []func(string, io.ReaderAt) (nativeImage, error){newElf, newMachO, newPE} 544 545 // NOTE: multiple readers are returned to cover universal binaries, which are files 546 // with more than one binary 547 readers, err := unionreader.GetReaders(reader) 548 if err != nil { 549 log.Debugf("failed to open the java native-image binary: %v", err) 550 return nil 551 } 552 for _, r := range readers { 553 for _, makeNativeImage := range imageFormats { 554 ni, err := makeNativeImage(filename, r) 555 if err != nil { 556 continue 557 } 558 if ni == nil { 559 continue 560 } 561 newPkgs, err := ni.fetchPkgs() 562 if err != nil { 563 log.Tracef("unable to extract SBOM from possible java native-image %s: %v", filename, err) 564 continue 565 } 566 pkgs = append(pkgs, newPkgs...) 567 } 568 } 569 return pkgs 570 } 571 572 // Catalog attempts to find any native image executables reachable from a resolver. 573 func (c *NativeImageCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 574 var pkgs []pkg.Package 575 fileMatches, err := resolver.FilesByMIMEType(internal.ExecutableMIMETypeSet.List()...) 576 if err != nil { 577 return pkgs, nil, fmt.Errorf("failed to find binaries by mime types: %w", err) 578 } 579 580 for _, location := range fileMatches { 581 readerCloser, err := resolver.FileContentsByLocation(location) 582 if err != nil { 583 log.Debugf("error opening file: %v", err) 584 continue 585 } 586 587 reader, err := unionreader.GetUnionReader(readerCloser) 588 if err != nil { 589 return nil, nil, err 590 } 591 newPkgs := fetchPkgs(reader, location.RealPath) 592 pkgs = append(pkgs, newPkgs...) 593 internal.CloseAndLogError(readerCloser, location.RealPath) 594 } 595 596 return pkgs, nil, nil 597 }