github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 package goobj 10 11 import ( 12 "bufio" 13 "bytes" 14 "github.com/gagliardetto/golang-go/cmd/internal/objabi" 15 "errors" 16 "fmt" 17 "io" 18 "os" 19 "strconv" 20 "strings" 21 ) 22 23 // A Sym is a named symbol in an object file. 24 type Sym struct { 25 SymID // symbol identifier (name and version) 26 Kind objabi.SymKind // kind of symbol 27 DupOK bool // are duplicate definitions okay? 28 Size int64 // size of corresponding data 29 Type SymID // symbol for Go type information 30 Data Data // memory image of symbol 31 Reloc []Reloc // relocations to apply to Data 32 Func *Func // additional data for functions 33 } 34 35 // A SymID - the combination of Name and Version - uniquely identifies 36 // a symbol within a package. 37 type SymID struct { 38 // Name is the name of a symbol. 39 Name string 40 41 // Version is zero for symbols with global visibility. 42 // Symbols with only file visibility (such as file-level static 43 // declarations in C) have a non-zero version distinguishing 44 // a symbol in one file from a symbol of the same name 45 // in another file 46 Version int64 47 } 48 49 func (s SymID) String() string { 50 if s.Version == 0 { 51 return s.Name 52 } 53 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 54 } 55 56 // A Data is a reference to data stored in an object file. 57 // It records the offset and size of the data, so that a client can 58 // read the data only if necessary. 59 type Data struct { 60 Offset int64 61 Size int64 62 } 63 64 // A Reloc describes a relocation applied to a memory image to refer 65 // to an address within a particular symbol. 66 type Reloc struct { 67 // The bytes at [Offset, Offset+Size) within the containing Sym 68 // should be updated to refer to the address Add bytes after the start 69 // of the symbol Sym. 70 Offset int64 71 Size int64 72 Sym SymID 73 Add int64 74 75 // The Type records the form of address expected in the bytes 76 // described by the previous fields: absolute, PC-relative, and so on. 77 // TODO(rsc): The interpretation of Type is not exposed by this package. 78 Type objabi.RelocType 79 } 80 81 // A Var describes a variable in a function stack frame: a declared 82 // local variable, an input argument, or an output result. 83 type Var struct { 84 // The combination of Name, Kind, and Offset uniquely 85 // identifies a variable in a function stack frame. 86 // Using fewer of these - in particular, using only Name - does not. 87 Name string // Name of variable. 88 Kind int64 // TODO(rsc): Define meaning. 89 Offset int64 // Frame offset. TODO(rsc): Define meaning. 90 91 Type SymID // Go type for variable. 92 } 93 94 // Func contains additional per-symbol information specific to functions. 95 type Func struct { 96 Args int64 // size in bytes of argument frame: inputs and outputs 97 Frame int64 // size in bytes of local variable frame 98 Leaf bool // function omits save of link register (ARM) 99 NoSplit bool // function omits stack split prologue 100 TopFrame bool // function is the top of the call stack 101 Var []Var // detail about local variables 102 PCSP Data // PC → SP offset map 103 PCFile Data // PC → file number map (index into File) 104 PCLine Data // PC → line number map 105 PCInline Data // PC → inline tree index map 106 PCData []Data // PC → runtime support data map 107 FuncData []FuncData // non-PC-specific runtime support data 108 File []string // paths indexed by PCFile 109 InlTree []InlinedCall 110 } 111 112 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 113 114 // A FuncData is a single function-specific data value. 115 type FuncData struct { 116 Sym SymID // symbol holding data 117 Offset int64 // offset into symbol for funcdata pointer 118 } 119 120 // An InlinedCall is a node in an InlTree. 121 // See cmd/internal/obj.InlTree for details. 122 type InlinedCall struct { 123 Parent int64 124 File string 125 Line int64 126 Func SymID 127 ParentPC int64 128 } 129 130 // A Package is a parsed Go object file or archive defining a Go package. 131 type Package struct { 132 ImportPath string // import path denoting this package 133 Imports []string // packages imported by this package 134 SymRefs []SymID // list of symbol names and versions referred to by this pack 135 Syms []*Sym // symbols defined by this package 136 MaxVersion int64 // maximum Version in any SymID in Syms 137 Arch string // architecture 138 Native []*NativeReader // native object data (e.g. ELF) 139 DWARFFileList []string // List of files for the DWARF .debug_lines section 140 } 141 142 type NativeReader struct { 143 Name string 144 io.ReaderAt 145 } 146 147 var ( 148 archiveHeader = []byte("!<arch>\n") 149 archiveMagic = []byte("`\n") 150 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 151 152 errCorruptArchive = errors.New("corrupt archive") 153 errTruncatedArchive = errors.New("truncated archive") 154 errCorruptObject = errors.New("corrupt object file") 155 errNotObject = errors.New("unrecognized object file format") 156 ) 157 158 // An objReader is an object file reader. 159 type objReader struct { 160 p *Package 161 b *bufio.Reader 162 f *os.File 163 err error 164 offset int64 165 dataOffset int64 166 limit int64 167 tmp [256]byte 168 pkgprefix string 169 } 170 171 // init initializes r to read package p from f. 172 func (r *objReader) init(f *os.File, p *Package) { 173 r.f = f 174 r.p = p 175 r.offset, _ = f.Seek(0, io.SeekCurrent) 176 r.limit, _ = f.Seek(0, io.SeekEnd) 177 f.Seek(r.offset, io.SeekStart) 178 r.b = bufio.NewReader(f) 179 r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "." 180 } 181 182 // error records that an error occurred. 183 // It returns only the first error, so that an error 184 // caused by an earlier error does not discard information 185 // about the earlier error. 186 func (r *objReader) error(err error) error { 187 if r.err == nil { 188 if err == io.EOF { 189 err = io.ErrUnexpectedEOF 190 } 191 r.err = err 192 } 193 // panic("corrupt") // useful for debugging 194 return r.err 195 } 196 197 // peek returns the next n bytes without advancing the reader. 198 func (r *objReader) peek(n int) ([]byte, error) { 199 if r.err != nil { 200 return nil, r.err 201 } 202 if r.offset >= r.limit { 203 r.error(io.ErrUnexpectedEOF) 204 return nil, r.err 205 } 206 b, err := r.b.Peek(n) 207 if err != nil { 208 if err != bufio.ErrBufferFull { 209 r.error(err) 210 } 211 } 212 return b, err 213 } 214 215 // readByte reads and returns a byte from the input file. 216 // On I/O error or EOF, it records the error but returns byte 0. 217 // A sequence of 0 bytes will eventually terminate any 218 // parsing state in the object file. In particular, it ends the 219 // reading of a varint. 220 func (r *objReader) readByte() byte { 221 if r.err != nil { 222 return 0 223 } 224 if r.offset >= r.limit { 225 r.error(io.ErrUnexpectedEOF) 226 return 0 227 } 228 b, err := r.b.ReadByte() 229 if err != nil { 230 if err == io.EOF { 231 err = io.ErrUnexpectedEOF 232 } 233 r.error(err) 234 b = 0 235 } else { 236 r.offset++ 237 } 238 return b 239 } 240 241 // read reads exactly len(b) bytes from the input file. 242 // If an error occurs, read returns the error but also 243 // records it, so it is safe for callers to ignore the result 244 // as long as delaying the report is not a problem. 245 func (r *objReader) readFull(b []byte) error { 246 if r.err != nil { 247 return r.err 248 } 249 if r.offset+int64(len(b)) > r.limit { 250 return r.error(io.ErrUnexpectedEOF) 251 } 252 n, err := io.ReadFull(r.b, b) 253 r.offset += int64(n) 254 if err != nil { 255 return r.error(err) 256 } 257 return nil 258 } 259 260 // readInt reads a zigzag varint from the input file. 261 func (r *objReader) readInt() int64 { 262 var u uint64 263 264 for shift := uint(0); ; shift += 7 { 265 if shift >= 64 { 266 r.error(errCorruptObject) 267 return 0 268 } 269 c := r.readByte() 270 u |= uint64(c&0x7F) << shift 271 if c&0x80 == 0 { 272 break 273 } 274 } 275 276 return int64(u>>1) ^ (int64(u) << 63 >> 63) 277 } 278 279 // readString reads a length-delimited string from the input file. 280 func (r *objReader) readString() string { 281 n := r.readInt() 282 buf := make([]byte, n) 283 r.readFull(buf) 284 return string(buf) 285 } 286 287 // readSymID reads a SymID from the input file. 288 func (r *objReader) readSymID() SymID { 289 i := r.readInt() 290 return r.p.SymRefs[i] 291 } 292 293 func (r *objReader) readRef() { 294 name, abiOrStatic := r.readString(), r.readInt() 295 296 // In a symbol name in an object file, "". denotes the 297 // prefix for the package in which the object file has been found. 298 // Expand it. 299 name = strings.ReplaceAll(name, `"".`, r.pkgprefix) 300 301 // The ABI field records either the ABI or -1 for static symbols. 302 // 303 // To distinguish different static symbols with the same name, 304 // we use the symbol "version". Version 0 corresponds to 305 // global symbols, and each file has a unique version > 0 for 306 // all of its static symbols. The version is incremented on 307 // each call to parseObject. 308 // 309 // For global symbols, we currently ignore the ABI. 310 // 311 // TODO(austin): Record the ABI in SymID. Since this is a 312 // public API, we'll have to keep Version as 0 and record the 313 // ABI in a new field (which differs from how the linker does 314 // this, but that's okay). Show the ABI in things like 315 // objdump. 316 var vers int64 317 if abiOrStatic == -1 { 318 // Static symbol 319 vers = r.p.MaxVersion 320 } 321 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 322 } 323 324 // readData reads a data reference from the input file. 325 func (r *objReader) readData() Data { 326 n := r.readInt() 327 d := Data{Offset: r.dataOffset, Size: n} 328 r.dataOffset += n 329 return d 330 } 331 332 // skip skips n bytes in the input. 333 func (r *objReader) skip(n int64) { 334 if n < 0 { 335 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 336 } 337 if n < int64(len(r.tmp)) { 338 // Since the data is so small, a just reading from the buffered 339 // reader is better than flushing the buffer and seeking. 340 r.readFull(r.tmp[:n]) 341 } else if n <= int64(r.b.Buffered()) { 342 // Even though the data is not small, it has already been read. 343 // Advance the buffer instead of seeking. 344 for n > int64(len(r.tmp)) { 345 r.readFull(r.tmp[:]) 346 n -= int64(len(r.tmp)) 347 } 348 r.readFull(r.tmp[:n]) 349 } else { 350 // Seek, giving up buffered data. 351 _, err := r.f.Seek(r.offset+n, io.SeekStart) 352 if err != nil { 353 r.error(err) 354 } 355 r.offset += n 356 r.b.Reset(r.f) 357 } 358 } 359 360 // Parse parses an object file or archive from f, 361 // assuming that its import path is pkgpath. 362 func Parse(f *os.File, pkgpath string) (*Package, error) { 363 if pkgpath == "" { 364 pkgpath = `""` 365 } 366 p := new(Package) 367 p.ImportPath = pkgpath 368 369 var rd objReader 370 rd.init(f, p) 371 err := rd.readFull(rd.tmp[:8]) 372 if err != nil { 373 if err == io.EOF { 374 err = io.ErrUnexpectedEOF 375 } 376 return nil, err 377 } 378 379 switch { 380 default: 381 return nil, errNotObject 382 383 case bytes.Equal(rd.tmp[:8], archiveHeader): 384 if err := rd.parseArchive(); err != nil { 385 return nil, err 386 } 387 case bytes.Equal(rd.tmp[:8], goobjHeader): 388 if err := rd.parseObject(goobjHeader); err != nil { 389 return nil, err 390 } 391 } 392 393 return p, nil 394 } 395 396 // trimSpace removes trailing spaces from b and returns the corresponding string. 397 // This effectively parses the form used in archive headers. 398 func trimSpace(b []byte) string { 399 return string(bytes.TrimRight(b, " ")) 400 } 401 402 // parseArchive parses a Unix archive of Go object files. 403 func (r *objReader) parseArchive() error { 404 for r.offset < r.limit { 405 if err := r.readFull(r.tmp[:60]); err != nil { 406 return err 407 } 408 data := r.tmp[:60] 409 410 // Each file is preceded by this text header (slice indices in first column): 411 // 0:16 name 412 // 16:28 date 413 // 28:34 uid 414 // 34:40 gid 415 // 40:48 mode 416 // 48:58 size 417 // 58:60 magic - `\n 418 // We only care about name, size, and magic. 419 // The fields are space-padded on the right. 420 // The size is in decimal. 421 // The file data - size bytes - follows the header. 422 // Headers are 2-byte aligned, so if size is odd, an extra padding 423 // byte sits between the file data and the next header. 424 // The file data that follows is padded to an even number of bytes: 425 // if size is odd, an extra padding byte is inserted betw the next header. 426 if len(data) < 60 { 427 return errTruncatedArchive 428 } 429 if !bytes.Equal(data[58:60], archiveMagic) { 430 return errCorruptArchive 431 } 432 name := trimSpace(data[0:16]) 433 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 434 if err != nil { 435 return errCorruptArchive 436 } 437 data = data[60:] 438 fsize := size + size&1 439 if fsize < 0 || fsize < size { 440 return errCorruptArchive 441 } 442 switch name { 443 case "__.PKGDEF": 444 r.skip(size) 445 default: 446 oldLimit := r.limit 447 r.limit = r.offset + size 448 449 p, err := r.peek(8) 450 if err != nil { 451 return err 452 } 453 if bytes.Equal(p, goobjHeader) { 454 if err := r.parseObject(nil); err != nil { 455 return fmt.Errorf("parsing archive member %q: %v", name, err) 456 } 457 } else { 458 r.p.Native = append(r.p.Native, &NativeReader{ 459 Name: name, 460 ReaderAt: io.NewSectionReader(r.f, r.offset, size), 461 }) 462 } 463 464 r.skip(r.limit - r.offset) 465 r.limit = oldLimit 466 } 467 if size&1 != 0 { 468 r.skip(1) 469 } 470 } 471 return nil 472 } 473 474 // parseObject parses a single Go object file. 475 // The prefix is the bytes already read from the file, 476 // typically in order to detect that this is an object file. 477 // The object file consists of a textual header ending in "\n!\n" 478 // and then the part we want to parse begins. 479 // The format of that part is defined in a comment at the top 480 // of src/liblink/objfile.c. 481 func (r *objReader) parseObject(prefix []byte) error { 482 r.p.MaxVersion++ 483 h := make([]byte, 0, 256) 484 h = append(h, prefix...) 485 var c1, c2, c3 byte 486 for { 487 c1, c2, c3 = c2, c3, r.readByte() 488 h = append(h, c3) 489 // The new export format can contain 0 bytes. 490 // Don't consider them errors, only look for r.err != nil. 491 if r.err != nil { 492 return errCorruptObject 493 } 494 if c1 == '\n' && c2 == '!' && c3 == '\n' { 495 break 496 } 497 } 498 499 hs := strings.Fields(string(h)) 500 if len(hs) >= 4 { 501 r.p.Arch = hs[3] 502 } 503 // TODO: extract OS + build ID if/when we need it 504 505 p, err := r.peek(8) 506 if err != nil { 507 return err 508 } 509 if bytes.Equal(p, []byte("\x00go114LD")) { 510 r.readNew() 511 return nil 512 } 513 r.readFull(r.tmp[:8]) 514 if !bytes.Equal(r.tmp[:8], []byte("\x00go114ld")) { 515 return r.error(errCorruptObject) 516 } 517 518 b := r.readByte() 519 if b != 1 { 520 return r.error(errCorruptObject) 521 } 522 523 // Direct package dependencies. 524 for { 525 s := r.readString() 526 if s == "" { 527 break 528 } 529 r.p.Imports = append(r.p.Imports, s) 530 } 531 532 // Read filenames for dwarf info. 533 count := r.readInt() 534 for i := int64(0); i < count; i++ { 535 r.p.DWARFFileList = append(r.p.DWARFFileList, r.readString()) 536 } 537 538 r.p.SymRefs = []SymID{{"", 0}} 539 for { 540 if b := r.readByte(); b != 0xfe { 541 if b != 0xff { 542 return r.error(errCorruptObject) 543 } 544 break 545 } 546 547 r.readRef() 548 } 549 550 dataLength := r.readInt() 551 r.readInt() // n relocations - ignore 552 r.readInt() // n pcdata - ignore 553 r.readInt() // n autom - ignore 554 r.readInt() // n funcdata - ignore 555 r.readInt() // n files - ignore 556 557 r.dataOffset = r.offset 558 r.skip(dataLength) 559 560 // Symbols. 561 for { 562 if b := r.readByte(); b != 0xfe { 563 if b != 0xff { 564 return r.error(errCorruptObject) 565 } 566 break 567 } 568 569 typ := r.readByte() 570 s := &Sym{SymID: r.readSymID()} 571 r.p.Syms = append(r.p.Syms, s) 572 s.Kind = objabi.SymKind(typ) 573 flags := r.readInt() 574 s.DupOK = flags&1 != 0 575 s.Size = r.readInt() 576 s.Type = r.readSymID() 577 s.Data = r.readData() 578 s.Reloc = make([]Reloc, r.readInt()) 579 for i := range s.Reloc { 580 rel := &s.Reloc[i] 581 rel.Offset = r.readInt() 582 rel.Size = r.readInt() 583 rel.Type = objabi.RelocType(r.readInt()) 584 rel.Add = r.readInt() 585 rel.Sym = r.readSymID() 586 } 587 588 if s.Kind == objabi.STEXT { 589 f := new(Func) 590 s.Func = f 591 f.Args = r.readInt() 592 f.Frame = r.readInt() 593 flags := r.readInt() 594 f.Leaf = flags&(1<<0) != 0 595 f.TopFrame = flags&(1<<4) != 0 596 f.NoSplit = r.readInt() != 0 597 f.Var = make([]Var, r.readInt()) 598 for i := range f.Var { 599 v := &f.Var[i] 600 v.Name = r.readSymID().Name 601 v.Offset = r.readInt() 602 v.Kind = r.readInt() 603 v.Type = r.readSymID() 604 } 605 606 f.PCSP = r.readData() 607 f.PCFile = r.readData() 608 f.PCLine = r.readData() 609 f.PCInline = r.readData() 610 f.PCData = make([]Data, r.readInt()) 611 for i := range f.PCData { 612 f.PCData[i] = r.readData() 613 } 614 f.FuncData = make([]FuncData, r.readInt()) 615 for i := range f.FuncData { 616 f.FuncData[i].Sym = r.readSymID() 617 } 618 for i := range f.FuncData { 619 f.FuncData[i].Offset = r.readInt() // TODO 620 } 621 f.File = make([]string, r.readInt()) 622 for i := range f.File { 623 f.File[i] = r.readSymID().Name 624 } 625 f.InlTree = make([]InlinedCall, r.readInt()) 626 for i := range f.InlTree { 627 f.InlTree[i].Parent = r.readInt() 628 f.InlTree[i].File = r.readSymID().Name 629 f.InlTree[i].Line = r.readInt() 630 f.InlTree[i].Func = r.readSymID() 631 f.InlTree[i].ParentPC = r.readInt() 632 } 633 } 634 } 635 636 r.readFull(r.tmp[:7]) 637 if !bytes.Equal(r.tmp[:7], []byte("go114ld")) { 638 return r.error(errCorruptObject) 639 } 640 641 return nil 642 } 643 644 func (r *Reloc) String(insnOffset uint64) string { 645 delta := r.Offset - int64(insnOffset) 646 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 647 if r.Sym.Name != "" { 648 if r.Add != 0 { 649 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 650 } 651 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 652 } 653 if r.Add != 0 { 654 return fmt.Sprintf("%s:%d", s, r.Add) 655 } 656 return s 657 }