github.com/bir3/gocompiler@v0.9.2202/src/debug/macho/file.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package macho implements access to Mach-O object files. 7 8 # Security 9 10 This package is not designed to be hardened against adversarial inputs, and is 11 outside the scope of https://go.dev/security/policy. In particular, only basic 12 validation is done when parsing object files. As such, care should be taken when 13 parsing untrusted inputs, as parsing malformed files may consume significant 14 resources, or cause panics. 15 */ 16 package macho 17 18 // High level access to low level data structures. 19 20 import ( 21 "bytes" 22 "compress/zlib" 23 "debug/dwarf" 24 "encoding/binary" 25 "fmt" 26 "github.com/bir3/gocompiler/src/internal/saferio" 27 "io" 28 "os" 29 "strings" 30 ) 31 32 // A File represents an open Mach-O file. 33 type File struct { 34 FileHeader 35 ByteOrder binary.ByteOrder 36 Loads []Load 37 Sections []*Section 38 39 Symtab *Symtab 40 Dysymtab *Dysymtab 41 42 closer io.Closer 43 } 44 45 // A Load represents any Mach-O load command. 46 type Load interface { 47 Raw() []byte 48 } 49 50 // A LoadBytes is the uninterpreted bytes of a Mach-O load command. 51 type LoadBytes []byte 52 53 func (b LoadBytes) Raw() []byte { return b } 54 55 // A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command. 56 type SegmentHeader struct { 57 Cmd LoadCmd 58 Len uint32 59 Name string 60 Addr uint64 61 Memsz uint64 62 Offset uint64 63 Filesz uint64 64 Maxprot uint32 65 Prot uint32 66 Nsect uint32 67 Flag uint32 68 } 69 70 // A Segment represents a Mach-O 32-bit or 64-bit load segment command. 71 type Segment struct { 72 LoadBytes 73 SegmentHeader 74 75 // Embed ReaderAt for ReadAt method. 76 // Do not embed SectionReader directly 77 // to avoid having Read and Seek. 78 // If a client wants Read and Seek it must use 79 // Open() to avoid fighting over the seek offset 80 // with other clients. 81 io.ReaderAt 82 sr *io.SectionReader 83 } 84 85 // Data reads and returns the contents of the segment. 86 func (s *Segment) Data() ([]byte, error) { 87 return saferio.ReadDataAt(s.sr, s.Filesz, 0) 88 } 89 90 // Open returns a new ReadSeeker reading the segment. 91 func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) } 92 93 type SectionHeader struct { 94 Name string 95 Seg string 96 Addr uint64 97 Size uint64 98 Offset uint32 99 Align uint32 100 Reloff uint32 101 Nreloc uint32 102 Flags uint32 103 } 104 105 // A Reloc represents a Mach-O relocation. 106 type Reloc struct { 107 Addr uint32 108 Value uint32 109 // when Scattered == false && Extern == true, Value is the symbol number. 110 // when Scattered == false && Extern == false, Value is the section number. 111 // when Scattered == true, Value is the value that this reloc refers to. 112 Type uint8 113 Len uint8 // 0=byte, 1=word, 2=long, 3=quad 114 Pcrel bool 115 Extern bool // valid if Scattered == false 116 Scattered bool 117 } 118 119 type Section struct { 120 SectionHeader 121 Relocs []Reloc 122 123 // Embed ReaderAt for ReadAt method. 124 // Do not embed SectionReader directly 125 // to avoid having Read and Seek. 126 // If a client wants Read and Seek it must use 127 // Open() to avoid fighting over the seek offset 128 // with other clients. 129 io.ReaderAt 130 sr *io.SectionReader 131 } 132 133 // Data reads and returns the contents of the Mach-O section. 134 func (s *Section) Data() ([]byte, error) { 135 return saferio.ReadDataAt(s.sr, s.Size, 0) 136 } 137 138 // Open returns a new ReadSeeker reading the Mach-O section. 139 func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) } 140 141 // A Dylib represents a Mach-O load dynamic library command. 142 type Dylib struct { 143 LoadBytes 144 Name string 145 Time uint32 146 CurrentVersion uint32 147 CompatVersion uint32 148 } 149 150 // A Symtab represents a Mach-O symbol table command. 151 type Symtab struct { 152 LoadBytes 153 SymtabCmd 154 Syms []Symbol 155 } 156 157 // A Dysymtab represents a Mach-O dynamic symbol table command. 158 type Dysymtab struct { 159 LoadBytes 160 DysymtabCmd 161 IndirectSyms []uint32 // indices into Symtab.Syms 162 } 163 164 // A Rpath represents a Mach-O rpath command. 165 type Rpath struct { 166 LoadBytes 167 Path string 168 } 169 170 // A Symbol is a Mach-O 32-bit or 64-bit symbol table entry. 171 type Symbol struct { 172 Name string 173 Type uint8 174 Sect uint8 175 Desc uint16 176 Value uint64 177 } 178 179 /* 180 * Mach-O reader 181 */ 182 183 // FormatError is returned by some operations if the data does 184 // not have the correct format for an object file. 185 type FormatError struct { 186 off int64 187 msg string 188 val any 189 } 190 191 func (e *FormatError) Error() string { 192 msg := e.msg 193 if e.val != nil { 194 msg += fmt.Sprintf(" '%v'", e.val) 195 } 196 msg += fmt.Sprintf(" in record at byte %#x", e.off) 197 return msg 198 } 199 200 // Open opens the named file using [os.Open] and prepares it for use as a Mach-O binary. 201 func Open(name string) (*File, error) { 202 f, err := os.Open(name) 203 if err != nil { 204 return nil, err 205 } 206 ff, err := NewFile(f) 207 if err != nil { 208 f.Close() 209 return nil, err 210 } 211 ff.closer = f 212 return ff, nil 213 } 214 215 // Close closes the [File]. 216 // If the [File] was created using [NewFile] directly instead of [Open], 217 // Close has no effect. 218 func (f *File) Close() error { 219 var err error 220 if f.closer != nil { 221 err = f.closer.Close() 222 f.closer = nil 223 } 224 return err 225 } 226 227 // NewFile creates a new [File] for accessing a Mach-O binary in an underlying reader. 228 // The Mach-O binary is expected to start at position 0 in the ReaderAt. 229 func NewFile(r io.ReaderAt) (*File, error) { 230 f := new(File) 231 sr := io.NewSectionReader(r, 0, 1<<63-1) 232 233 // Read and decode Mach magic to determine byte order, size. 234 // Magic32 and Magic64 differ only in the bottom bit. 235 var ident [4]byte 236 if _, err := r.ReadAt(ident[0:], 0); err != nil { 237 return nil, err 238 } 239 be := binary.BigEndian.Uint32(ident[0:]) 240 le := binary.LittleEndian.Uint32(ident[0:]) 241 switch Magic32 &^ 1 { 242 case be &^ 1: 243 f.ByteOrder = binary.BigEndian 244 f.Magic = be 245 case le &^ 1: 246 f.ByteOrder = binary.LittleEndian 247 f.Magic = le 248 default: 249 return nil, &FormatError{0, "invalid magic number", nil} 250 } 251 252 // Read entire file header. 253 if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil { 254 return nil, err 255 } 256 257 // Then load commands. 258 offset := int64(fileHeaderSize32) 259 if f.Magic == Magic64 { 260 offset = fileHeaderSize64 261 } 262 dat, err := saferio.ReadDataAt(r, uint64(f.Cmdsz), offset) 263 if err != nil { 264 return nil, err 265 } 266 c := saferio.SliceCap[Load](uint64(f.Ncmd)) 267 if c < 0 { 268 return nil, &FormatError{offset, "too many load commands", nil} 269 } 270 f.Loads = make([]Load, 0, c) 271 bo := f.ByteOrder 272 for i := uint32(0); i < f.Ncmd; i++ { 273 // Each load command begins with uint32 command and length. 274 if len(dat) < 8 { 275 return nil, &FormatError{offset, "command block too small", nil} 276 } 277 cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8]) 278 if siz < 8 || siz > uint32(len(dat)) { 279 return nil, &FormatError{offset, "invalid command block size", nil} 280 } 281 var cmddat []byte 282 cmddat, dat = dat[0:siz], dat[siz:] 283 offset += int64(siz) 284 var s *Segment 285 switch cmd { 286 default: 287 f.Loads = append(f.Loads, LoadBytes(cmddat)) 288 289 case LoadCmdRpath: 290 var hdr RpathCmd 291 b := bytes.NewReader(cmddat) 292 if err := binary.Read(b, bo, &hdr); err != nil { 293 return nil, err 294 } 295 l := new(Rpath) 296 if hdr.Path >= uint32(len(cmddat)) { 297 return nil, &FormatError{offset, "invalid path in rpath command", hdr.Path} 298 } 299 l.Path = cstring(cmddat[hdr.Path:]) 300 l.LoadBytes = LoadBytes(cmddat) 301 f.Loads = append(f.Loads, l) 302 303 case LoadCmdDylib: 304 var hdr DylibCmd 305 b := bytes.NewReader(cmddat) 306 if err := binary.Read(b, bo, &hdr); err != nil { 307 return nil, err 308 } 309 l := new(Dylib) 310 if hdr.Name >= uint32(len(cmddat)) { 311 return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name} 312 } 313 l.Name = cstring(cmddat[hdr.Name:]) 314 l.Time = hdr.Time 315 l.CurrentVersion = hdr.CurrentVersion 316 l.CompatVersion = hdr.CompatVersion 317 l.LoadBytes = LoadBytes(cmddat) 318 f.Loads = append(f.Loads, l) 319 320 case LoadCmdSymtab: 321 var hdr SymtabCmd 322 b := bytes.NewReader(cmddat) 323 if err := binary.Read(b, bo, &hdr); err != nil { 324 return nil, err 325 } 326 strtab, err := saferio.ReadDataAt(r, uint64(hdr.Strsize), int64(hdr.Stroff)) 327 if err != nil { 328 return nil, err 329 } 330 var symsz int 331 if f.Magic == Magic64 { 332 symsz = 16 333 } else { 334 symsz = 12 335 } 336 symdat, err := saferio.ReadDataAt(r, uint64(hdr.Nsyms)*uint64(symsz), int64(hdr.Symoff)) 337 if err != nil { 338 return nil, err 339 } 340 st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset) 341 if err != nil { 342 return nil, err 343 } 344 f.Loads = append(f.Loads, st) 345 f.Symtab = st 346 347 case LoadCmdDysymtab: 348 var hdr DysymtabCmd 349 b := bytes.NewReader(cmddat) 350 if err := binary.Read(b, bo, &hdr); err != nil { 351 return nil, err 352 } 353 if f.Symtab == nil { 354 return nil, &FormatError{offset, "dynamic symbol table seen before any ordinary symbol table", nil} 355 } else if hdr.Iundefsym > uint32(len(f.Symtab.Syms)) { 356 return nil, &FormatError{offset, fmt.Sprintf( 357 "undefined symbols index in dynamic symbol table command is greater than symbol table length (%d > %d)", 358 hdr.Iundefsym, len(f.Symtab.Syms)), nil} 359 } else if hdr.Iundefsym+hdr.Nundefsym > uint32(len(f.Symtab.Syms)) { 360 return nil, &FormatError{offset, fmt.Sprintf( 361 "number of undefined symbols after index in dynamic symbol table command is greater than symbol table length (%d > %d)", 362 hdr.Iundefsym+hdr.Nundefsym, len(f.Symtab.Syms)), nil} 363 } 364 dat, err := saferio.ReadDataAt(r, uint64(hdr.Nindirectsyms)*4, int64(hdr.Indirectsymoff)) 365 if err != nil { 366 return nil, err 367 } 368 x := make([]uint32, hdr.Nindirectsyms) 369 if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil { 370 return nil, err 371 } 372 st := new(Dysymtab) 373 st.LoadBytes = LoadBytes(cmddat) 374 st.DysymtabCmd = hdr 375 st.IndirectSyms = x 376 f.Loads = append(f.Loads, st) 377 f.Dysymtab = st 378 379 case LoadCmdSegment: 380 var seg32 Segment32 381 b := bytes.NewReader(cmddat) 382 if err := binary.Read(b, bo, &seg32); err != nil { 383 return nil, err 384 } 385 s = new(Segment) 386 s.LoadBytes = cmddat 387 s.Cmd = cmd 388 s.Len = siz 389 s.Name = cstring(seg32.Name[0:]) 390 s.Addr = uint64(seg32.Addr) 391 s.Memsz = uint64(seg32.Memsz) 392 s.Offset = uint64(seg32.Offset) 393 s.Filesz = uint64(seg32.Filesz) 394 s.Maxprot = seg32.Maxprot 395 s.Prot = seg32.Prot 396 s.Nsect = seg32.Nsect 397 s.Flag = seg32.Flag 398 f.Loads = append(f.Loads, s) 399 for i := 0; i < int(s.Nsect); i++ { 400 var sh32 Section32 401 if err := binary.Read(b, bo, &sh32); err != nil { 402 return nil, err 403 } 404 sh := new(Section) 405 sh.Name = cstring(sh32.Name[0:]) 406 sh.Seg = cstring(sh32.Seg[0:]) 407 sh.Addr = uint64(sh32.Addr) 408 sh.Size = uint64(sh32.Size) 409 sh.Offset = sh32.Offset 410 sh.Align = sh32.Align 411 sh.Reloff = sh32.Reloff 412 sh.Nreloc = sh32.Nreloc 413 sh.Flags = sh32.Flags 414 if err := f.pushSection(sh, r); err != nil { 415 return nil, err 416 } 417 } 418 419 case LoadCmdSegment64: 420 var seg64 Segment64 421 b := bytes.NewReader(cmddat) 422 if err := binary.Read(b, bo, &seg64); err != nil { 423 return nil, err 424 } 425 s = new(Segment) 426 s.LoadBytes = cmddat 427 s.Cmd = cmd 428 s.Len = siz 429 s.Name = cstring(seg64.Name[0:]) 430 s.Addr = seg64.Addr 431 s.Memsz = seg64.Memsz 432 s.Offset = seg64.Offset 433 s.Filesz = seg64.Filesz 434 s.Maxprot = seg64.Maxprot 435 s.Prot = seg64.Prot 436 s.Nsect = seg64.Nsect 437 s.Flag = seg64.Flag 438 f.Loads = append(f.Loads, s) 439 for i := 0; i < int(s.Nsect); i++ { 440 var sh64 Section64 441 if err := binary.Read(b, bo, &sh64); err != nil { 442 return nil, err 443 } 444 sh := new(Section) 445 sh.Name = cstring(sh64.Name[0:]) 446 sh.Seg = cstring(sh64.Seg[0:]) 447 sh.Addr = sh64.Addr 448 sh.Size = sh64.Size 449 sh.Offset = sh64.Offset 450 sh.Align = sh64.Align 451 sh.Reloff = sh64.Reloff 452 sh.Nreloc = sh64.Nreloc 453 sh.Flags = sh64.Flags 454 if err := f.pushSection(sh, r); err != nil { 455 return nil, err 456 } 457 } 458 } 459 if s != nil { 460 if int64(s.Offset) < 0 { 461 return nil, &FormatError{offset, "invalid section offset", s.Offset} 462 } 463 if int64(s.Filesz) < 0 { 464 return nil, &FormatError{offset, "invalid section file size", s.Filesz} 465 } 466 s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz)) 467 s.ReaderAt = s.sr 468 } 469 } 470 return f, nil 471 } 472 473 func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) { 474 bo := f.ByteOrder 475 c := saferio.SliceCap[Symbol](uint64(hdr.Nsyms)) 476 if c < 0 { 477 return nil, &FormatError{offset, "too many symbols", nil} 478 } 479 symtab := make([]Symbol, 0, c) 480 b := bytes.NewReader(symdat) 481 for i := 0; i < int(hdr.Nsyms); i++ { 482 var n Nlist64 483 if f.Magic == Magic64 { 484 if err := binary.Read(b, bo, &n); err != nil { 485 return nil, err 486 } 487 } else { 488 var n32 Nlist32 489 if err := binary.Read(b, bo, &n32); err != nil { 490 return nil, err 491 } 492 n.Name = n32.Name 493 n.Type = n32.Type 494 n.Sect = n32.Sect 495 n.Desc = n32.Desc 496 n.Value = uint64(n32.Value) 497 } 498 if n.Name >= uint32(len(strtab)) { 499 return nil, &FormatError{offset, "invalid name in symbol table", n.Name} 500 } 501 // We add "_" to Go symbols. Strip it here. See issue 33808. 502 name := cstring(strtab[n.Name:]) 503 if strings.Contains(name, ".") && name[0] == '_' { 504 name = name[1:] 505 } 506 symtab = append(symtab, Symbol{ 507 Name: name, 508 Type: n.Type, 509 Sect: n.Sect, 510 Desc: n.Desc, 511 Value: n.Value, 512 }) 513 } 514 st := new(Symtab) 515 st.LoadBytes = LoadBytes(cmddat) 516 st.Syms = symtab 517 return st, nil 518 } 519 520 type relocInfo struct { 521 Addr uint32 522 Symnum uint32 523 } 524 525 func (f *File) pushSection(sh *Section, r io.ReaderAt) error { 526 f.Sections = append(f.Sections, sh) 527 sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size)) 528 sh.ReaderAt = sh.sr 529 530 if sh.Nreloc > 0 { 531 reldat, err := saferio.ReadDataAt(r, uint64(sh.Nreloc)*8, int64(sh.Reloff)) 532 if err != nil { 533 return err 534 } 535 b := bytes.NewReader(reldat) 536 537 bo := f.ByteOrder 538 539 sh.Relocs = make([]Reloc, sh.Nreloc) 540 for i := range sh.Relocs { 541 rel := &sh.Relocs[i] 542 543 var ri relocInfo 544 if err := binary.Read(b, bo, &ri); err != nil { 545 return err 546 } 547 548 if ri.Addr&(1<<31) != 0 { // scattered 549 rel.Addr = ri.Addr & (1<<24 - 1) 550 rel.Type = uint8((ri.Addr >> 24) & (1<<4 - 1)) 551 rel.Len = uint8((ri.Addr >> 28) & (1<<2 - 1)) 552 rel.Pcrel = ri.Addr&(1<<30) != 0 553 rel.Value = ri.Symnum 554 rel.Scattered = true 555 } else { 556 switch bo { 557 case binary.LittleEndian: 558 rel.Addr = ri.Addr 559 rel.Value = ri.Symnum & (1<<24 - 1) 560 rel.Pcrel = ri.Symnum&(1<<24) != 0 561 rel.Len = uint8((ri.Symnum >> 25) & (1<<2 - 1)) 562 rel.Extern = ri.Symnum&(1<<27) != 0 563 rel.Type = uint8((ri.Symnum >> 28) & (1<<4 - 1)) 564 case binary.BigEndian: 565 rel.Addr = ri.Addr 566 rel.Value = ri.Symnum >> 8 567 rel.Pcrel = ri.Symnum&(1<<7) != 0 568 rel.Len = uint8((ri.Symnum >> 5) & (1<<2 - 1)) 569 rel.Extern = ri.Symnum&(1<<4) != 0 570 rel.Type = uint8(ri.Symnum & (1<<4 - 1)) 571 default: 572 panic("unreachable") 573 } 574 } 575 } 576 } 577 578 return nil 579 } 580 581 func cstring(b []byte) string { 582 i := bytes.IndexByte(b, 0) 583 if i == -1 { 584 i = len(b) 585 } 586 return string(b[0:i]) 587 } 588 589 // Segment returns the first Segment with the given name, or nil if no such segment exists. 590 func (f *File) Segment(name string) *Segment { 591 for _, l := range f.Loads { 592 if s, ok := l.(*Segment); ok && s.Name == name { 593 return s 594 } 595 } 596 return nil 597 } 598 599 // Section returns the first section with the given name, or nil if no such 600 // section exists. 601 func (f *File) Section(name string) *Section { 602 for _, s := range f.Sections { 603 if s.Name == name { 604 return s 605 } 606 } 607 return nil 608 } 609 610 // DWARF returns the DWARF debug information for the Mach-O file. 611 func (f *File) DWARF() (*dwarf.Data, error) { 612 dwarfSuffix := func(s *Section) string { 613 switch { 614 case strings.HasPrefix(s.Name, "__debug_"): 615 return s.Name[8:] 616 case strings.HasPrefix(s.Name, "__zdebug_"): 617 return s.Name[9:] 618 default: 619 return "" 620 } 621 622 } 623 sectionData := func(s *Section) ([]byte, error) { 624 b, err := s.Data() 625 if err != nil && uint64(len(b)) < s.Size { 626 return nil, err 627 } 628 629 if len(b) >= 12 && string(b[:4]) == "ZLIB" { 630 dlen := binary.BigEndian.Uint64(b[4:12]) 631 dbuf := make([]byte, dlen) 632 r, err := zlib.NewReader(bytes.NewBuffer(b[12:])) 633 if err != nil { 634 return nil, err 635 } 636 if _, err := io.ReadFull(r, dbuf); err != nil { 637 return nil, err 638 } 639 if err := r.Close(); err != nil { 640 return nil, err 641 } 642 b = dbuf 643 } 644 return b, nil 645 } 646 647 // There are many other DWARF sections, but these 648 // are the ones the debug/dwarf package uses. 649 // Don't bother loading others. 650 var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil} 651 for _, s := range f.Sections { 652 suffix := dwarfSuffix(s) 653 if suffix == "" { 654 continue 655 } 656 if _, ok := dat[suffix]; !ok { 657 continue 658 } 659 b, err := sectionData(s) 660 if err != nil { 661 return nil, err 662 } 663 dat[suffix] = b 664 } 665 666 d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"]) 667 if err != nil { 668 return nil, err 669 } 670 671 // Look for DWARF4 .debug_types sections and DWARF5 sections. 672 for i, s := range f.Sections { 673 suffix := dwarfSuffix(s) 674 if suffix == "" { 675 continue 676 } 677 if _, ok := dat[suffix]; ok { 678 // Already handled. 679 continue 680 } 681 682 b, err := sectionData(s) 683 if err != nil { 684 return nil, err 685 } 686 687 if suffix == "types" { 688 err = d.AddTypes(fmt.Sprintf("types-%d", i), b) 689 } else { 690 err = d.AddSection(".debug_"+suffix, b) 691 } 692 if err != nil { 693 return nil, err 694 } 695 } 696 697 return d, nil 698 } 699 700 // ImportedSymbols returns the names of all symbols 701 // referred to by the binary f that are expected to be 702 // satisfied by other libraries at dynamic load time. 703 func (f *File) ImportedSymbols() ([]string, error) { 704 if f.Dysymtab == nil || f.Symtab == nil { 705 return nil, &FormatError{0, "missing symbol table", nil} 706 } 707 708 st := f.Symtab 709 dt := f.Dysymtab 710 var all []string 711 for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] { 712 all = append(all, s.Name) 713 } 714 return all, nil 715 } 716 717 // ImportedLibraries returns the paths of all libraries 718 // referred to by the binary f that are expected to be 719 // linked with the binary at dynamic link time. 720 func (f *File) ImportedLibraries() ([]string, error) { 721 var all []string 722 for _, l := range f.Loads { 723 if lib, ok := l.(*Dylib); ok { 724 all = append(all, lib.Name) 725 } 726 } 727 return all, nil 728 }