github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 // TODO(rsc): Write tests. (File format still up in the air a little.) 10 package goobj 11 12 import ( 13 "bufio" 14 "bytes" 15 "cmd/internal/obj" 16 "errors" 17 "fmt" 18 "io" 19 "strconv" 20 "strings" 21 ) 22 23 // A SymKind describes the kind of memory represented by a symbol. 24 type SymKind int 25 26 // This list is taken from include/link.h. 27 28 // Defined SymKind values. 29 // TODO(rsc): Give idiomatic Go names. 30 // TODO(rsc): Reduce the number of symbol types in the object files. 31 const ( 32 // readonly, executable 33 STEXT = SymKind(obj.STEXT) 34 SELFRXSECT = SymKind(obj.SELFRXSECT) 35 36 // readonly, non-executable 37 STYPE = SymKind(obj.STYPE) 38 SSTRING = SymKind(obj.SSTRING) 39 SGOSTRING = SymKind(obj.SGOSTRING) 40 SGOFUNC = SymKind(obj.SGOFUNC) 41 SRODATA = SymKind(obj.SRODATA) 42 SFUNCTAB = SymKind(obj.SFUNCTAB) 43 STYPELINK = SymKind(obj.STYPELINK) 44 SITABLINK = SymKind(obj.SITABLINK) 45 SSYMTAB = SymKind(obj.SSYMTAB) // TODO: move to unmapped section 46 SPCLNTAB = SymKind(obj.SPCLNTAB) 47 SELFROSECT = SymKind(obj.SELFROSECT) 48 49 // writable, non-executable 50 SMACHOPLT = SymKind(obj.SMACHOPLT) 51 SELFSECT = SymKind(obj.SELFSECT) 52 SMACHO = SymKind(obj.SMACHO) // Mach-O __nl_symbol_ptr 53 SMACHOGOT = SymKind(obj.SMACHOGOT) 54 SWINDOWS = SymKind(obj.SWINDOWS) 55 SELFGOT = SymKind(obj.SELFGOT) 56 SNOPTRDATA = SymKind(obj.SNOPTRDATA) 57 SINITARR = SymKind(obj.SINITARR) 58 SDATA = SymKind(obj.SDATA) 59 SBSS = SymKind(obj.SBSS) 60 SNOPTRBSS = SymKind(obj.SNOPTRBSS) 61 STLSBSS = SymKind(obj.STLSBSS) 62 63 // not mapped 64 SXREF = SymKind(obj.SXREF) 65 SMACHOSYMSTR = SymKind(obj.SMACHOSYMSTR) 66 SMACHOSYMTAB = SymKind(obj.SMACHOSYMTAB) 67 SMACHOINDIRECTPLT = SymKind(obj.SMACHOINDIRECTPLT) 68 SMACHOINDIRECTGOT = SymKind(obj.SMACHOINDIRECTGOT) 69 SFILE = SymKind(obj.SFILE) 70 SFILEPATH = SymKind(obj.SFILEPATH) 71 SCONST = SymKind(obj.SCONST) 72 SDYNIMPORT = SymKind(obj.SDYNIMPORT) 73 SHOSTOBJ = SymKind(obj.SHOSTOBJ) 74 ) 75 76 var symKindStrings = []string{ 77 SBSS: "SBSS", 78 SCONST: "SCONST", 79 SDATA: "SDATA", 80 SDYNIMPORT: "SDYNIMPORT", 81 SELFROSECT: "SELFROSECT", 82 SELFRXSECT: "SELFRXSECT", 83 SELFSECT: "SELFSECT", 84 SFILE: "SFILE", 85 SFILEPATH: "SFILEPATH", 86 SFUNCTAB: "SFUNCTAB", 87 SGOFUNC: "SGOFUNC", 88 SGOSTRING: "SGOSTRING", 89 SHOSTOBJ: "SHOSTOBJ", 90 SINITARR: "SINITARR", 91 SMACHO: "SMACHO", 92 SMACHOGOT: "SMACHOGOT", 93 SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", 94 SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", 95 SMACHOPLT: "SMACHOPLT", 96 SMACHOSYMSTR: "SMACHOSYMSTR", 97 SMACHOSYMTAB: "SMACHOSYMTAB", 98 SNOPTRBSS: "SNOPTRBSS", 99 SNOPTRDATA: "SNOPTRDATA", 100 SPCLNTAB: "SPCLNTAB", 101 SRODATA: "SRODATA", 102 SSTRING: "SSTRING", 103 SSYMTAB: "SSYMTAB", 104 STEXT: "STEXT", 105 STLSBSS: "STLSBSS", 106 STYPE: "STYPE", 107 STYPELINK: "STYPELINK", 108 SITABLINK: "SITABLINK", 109 SWINDOWS: "SWINDOWS", 110 SXREF: "SXREF", 111 } 112 113 func (k SymKind) String() string { 114 if k < 0 || int(k) >= len(symKindStrings) { 115 return fmt.Sprintf("SymKind(%d)", k) 116 } 117 return symKindStrings[k] 118 } 119 120 // A Sym is a named symbol in an object file. 121 type Sym struct { 122 SymID // symbol identifier (name and version) 123 Kind SymKind // kind of symbol 124 DupOK bool // are duplicate definitions okay? 125 Size int // size of corresponding data 126 Type SymID // symbol for Go type information 127 Data Data // memory image of symbol 128 Reloc []Reloc // relocations to apply to Data 129 Func *Func // additional data for functions 130 } 131 132 // A SymID - the combination of Name and Version - uniquely identifies 133 // a symbol within a package. 134 type SymID struct { 135 // Name is the name of a symbol. 136 Name string 137 138 // Version is zero for symbols with global visibility. 139 // Symbols with only file visibility (such as file-level static 140 // declarations in C) have a non-zero version distinguishing 141 // a symbol in one file from a symbol of the same name 142 // in another file 143 Version int 144 } 145 146 func (s SymID) String() string { 147 if s.Version == 0 { 148 return s.Name 149 } 150 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 151 } 152 153 // A Data is a reference to data stored in an object file. 154 // It records the offset and size of the data, so that a client can 155 // read the data only if necessary. 156 type Data struct { 157 Offset int64 158 Size int64 159 } 160 161 // A Reloc describes a relocation applied to a memory image to refer 162 // to an address within a particular symbol. 163 type Reloc struct { 164 // The bytes at [Offset, Offset+Size) within the containing Sym 165 // should be updated to refer to the address Add bytes after the start 166 // of the symbol Sym. 167 Offset int 168 Size int 169 Sym SymID 170 Add int 171 172 // The Type records the form of address expected in the bytes 173 // described by the previous fields: absolute, PC-relative, and so on. 174 // TODO(rsc): The interpretation of Type is not exposed by this package. 175 Type obj.RelocType 176 } 177 178 // A Var describes a variable in a function stack frame: a declared 179 // local variable, an input argument, or an output result. 180 type Var struct { 181 // The combination of Name, Kind, and Offset uniquely 182 // identifies a variable in a function stack frame. 183 // Using fewer of these - in particular, using only Name - does not. 184 Name string // Name of variable. 185 Kind int // TODO(rsc): Define meaning. 186 Offset int // Frame offset. TODO(rsc): Define meaning. 187 188 Type SymID // Go type for variable. 189 } 190 191 // Func contains additional per-symbol information specific to functions. 192 type Func struct { 193 Args int // size in bytes of argument frame: inputs and outputs 194 Frame int // size in bytes of local variable frame 195 Leaf bool // function omits save of link register (ARM) 196 NoSplit bool // function omits stack split prologue 197 Var []Var // detail about local variables 198 PCSP Data // PC → SP offset map 199 PCFile Data // PC → file number map (index into File) 200 PCLine Data // PC → line number map 201 PCData []Data // PC → runtime support data map 202 FuncData []FuncData // non-PC-specific runtime support data 203 File []string // paths indexed by PCFile 204 } 205 206 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 207 208 // A FuncData is a single function-specific data value. 209 type FuncData struct { 210 Sym SymID // symbol holding data 211 Offset int64 // offset into symbol for funcdata pointer 212 } 213 214 // A Package is a parsed Go object file or archive defining a Go package. 215 type Package struct { 216 ImportPath string // import path denoting this package 217 Imports []string // packages imported by this package 218 SymRefs []SymID // list of symbol names and versions referred to by this pack 219 Syms []*Sym // symbols defined by this package 220 MaxVersion int // maximum Version in any SymID in Syms 221 Arch string // architecture 222 } 223 224 var ( 225 archiveHeader = []byte("!<arch>\n") 226 archiveMagic = []byte("`\n") 227 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 228 229 errCorruptArchive = errors.New("corrupt archive") 230 errTruncatedArchive = errors.New("truncated archive") 231 errCorruptObject = errors.New("corrupt object file") 232 errNotObject = errors.New("unrecognized object file format") 233 ) 234 235 // An objReader is an object file reader. 236 type objReader struct { 237 p *Package 238 b *bufio.Reader 239 f io.ReadSeeker 240 err error 241 offset int64 242 dataOffset int64 243 limit int64 244 tmp [256]byte 245 pkgprefix string 246 } 247 248 // importPathToPrefix returns the prefix that will be used in the 249 // final symbol table for the given import path. 250 // We escape '%', '"', all control characters and non-ASCII bytes, 251 // and any '.' after the final slash. 252 // 253 // See ../../../cmd/ld/lib.c:/^pathtoprefix and 254 // ../../../cmd/gc/subr.c:/^pathtoprefix. 255 func importPathToPrefix(s string) string { 256 // find index of last slash, if any, or else -1. 257 // used for determining whether an index is after the last slash. 258 slash := strings.LastIndex(s, "/") 259 260 // check for chars that need escaping 261 n := 0 262 for r := 0; r < len(s); r++ { 263 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 264 n++ 265 } 266 } 267 268 // quick exit 269 if n == 0 { 270 return s 271 } 272 273 // escape 274 const hex = "0123456789abcdef" 275 p := make([]byte, 0, len(s)+2*n) 276 for r := 0; r < len(s); r++ { 277 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 278 p = append(p, '%', hex[c>>4], hex[c&0xF]) 279 } else { 280 p = append(p, c) 281 } 282 } 283 284 return string(p) 285 } 286 287 // init initializes r to read package p from f. 288 func (r *objReader) init(f io.ReadSeeker, p *Package) { 289 r.f = f 290 r.p = p 291 r.offset, _ = f.Seek(0, io.SeekCurrent) 292 r.limit, _ = f.Seek(0, io.SeekEnd) 293 f.Seek(r.offset, io.SeekStart) 294 r.b = bufio.NewReader(f) 295 r.pkgprefix = importPathToPrefix(p.ImportPath) + "." 296 } 297 298 // error records that an error occurred. 299 // It returns only the first error, so that an error 300 // caused by an earlier error does not discard information 301 // about the earlier error. 302 func (r *objReader) error(err error) error { 303 if r.err == nil { 304 if err == io.EOF { 305 err = io.ErrUnexpectedEOF 306 } 307 r.err = err 308 } 309 // panic("corrupt") // useful for debugging 310 return r.err 311 } 312 313 // readByte reads and returns a byte from the input file. 314 // On I/O error or EOF, it records the error but returns byte 0. 315 // A sequence of 0 bytes will eventually terminate any 316 // parsing state in the object file. In particular, it ends the 317 // reading of a varint. 318 func (r *objReader) readByte() byte { 319 if r.err != nil { 320 return 0 321 } 322 if r.offset >= r.limit { 323 r.error(io.ErrUnexpectedEOF) 324 return 0 325 } 326 b, err := r.b.ReadByte() 327 if err != nil { 328 if err == io.EOF { 329 err = io.ErrUnexpectedEOF 330 } 331 r.error(err) 332 b = 0 333 } else { 334 r.offset++ 335 } 336 return b 337 } 338 339 // read reads exactly len(b) bytes from the input file. 340 // If an error occurs, read returns the error but also 341 // records it, so it is safe for callers to ignore the result 342 // as long as delaying the report is not a problem. 343 func (r *objReader) readFull(b []byte) error { 344 if r.err != nil { 345 return r.err 346 } 347 if r.offset+int64(len(b)) > r.limit { 348 return r.error(io.ErrUnexpectedEOF) 349 } 350 n, err := io.ReadFull(r.b, b) 351 r.offset += int64(n) 352 if err != nil { 353 return r.error(err) 354 } 355 return nil 356 } 357 358 // readInt reads a zigzag varint from the input file. 359 func (r *objReader) readInt() int { 360 var u uint64 361 362 for shift := uint(0); ; shift += 7 { 363 if shift >= 64 { 364 r.error(errCorruptObject) 365 return 0 366 } 367 c := r.readByte() 368 u |= uint64(c&0x7F) << shift 369 if c&0x80 == 0 { 370 break 371 } 372 } 373 374 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 375 if int64(int(v)) != v { 376 r.error(errCorruptObject) // TODO 377 return 0 378 } 379 return int(v) 380 } 381 382 // readString reads a length-delimited string from the input file. 383 func (r *objReader) readString() string { 384 n := r.readInt() 385 buf := make([]byte, n) 386 r.readFull(buf) 387 return string(buf) 388 } 389 390 // readSymID reads a SymID from the input file. 391 func (r *objReader) readSymID() SymID { 392 i := r.readInt() 393 return r.p.SymRefs[i] 394 } 395 396 func (r *objReader) readRef() { 397 name, vers := r.readString(), r.readInt() 398 399 // In a symbol name in an object file, "". denotes the 400 // prefix for the package in which the object file has been found. 401 // Expand it. 402 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 403 404 // An individual object file only records version 0 (extern) or 1 (static). 405 // To make static symbols unique across all files being read, we 406 // replace version 1 with the version corresponding to the current 407 // file number. The number is incremented on each call to parseObject. 408 if vers != 0 { 409 vers = r.p.MaxVersion 410 } 411 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 412 } 413 414 // readData reads a data reference from the input file. 415 func (r *objReader) readData() Data { 416 n := r.readInt() 417 d := Data{Offset: r.dataOffset, Size: int64(n)} 418 r.dataOffset += int64(n) 419 return d 420 } 421 422 // skip skips n bytes in the input. 423 func (r *objReader) skip(n int64) { 424 if n < 0 { 425 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 426 } 427 if n < int64(len(r.tmp)) { 428 // Since the data is so small, a just reading from the buffered 429 // reader is better than flushing the buffer and seeking. 430 r.readFull(r.tmp[:n]) 431 } else if n <= int64(r.b.Buffered()) { 432 // Even though the data is not small, it has already been read. 433 // Advance the buffer instead of seeking. 434 for n > int64(len(r.tmp)) { 435 r.readFull(r.tmp[:]) 436 n -= int64(len(r.tmp)) 437 } 438 r.readFull(r.tmp[:n]) 439 } else { 440 // Seek, giving up buffered data. 441 _, err := r.f.Seek(r.offset+n, io.SeekStart) 442 if err != nil { 443 r.error(err) 444 } 445 r.offset += n 446 r.b.Reset(r.f) 447 } 448 } 449 450 // Parse parses an object file or archive from r, 451 // assuming that its import path is pkgpath. 452 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { 453 if pkgpath == "" { 454 pkgpath = `""` 455 } 456 p := new(Package) 457 p.ImportPath = pkgpath 458 459 var rd objReader 460 rd.init(r, p) 461 err := rd.readFull(rd.tmp[:8]) 462 if err != nil { 463 if err == io.EOF { 464 err = io.ErrUnexpectedEOF 465 } 466 return nil, err 467 } 468 469 switch { 470 default: 471 return nil, errNotObject 472 473 case bytes.Equal(rd.tmp[:8], archiveHeader): 474 if err := rd.parseArchive(); err != nil { 475 return nil, err 476 } 477 case bytes.Equal(rd.tmp[:8], goobjHeader): 478 if err := rd.parseObject(goobjHeader); err != nil { 479 return nil, err 480 } 481 } 482 483 return p, nil 484 } 485 486 // trimSpace removes trailing spaces from b and returns the corresponding string. 487 // This effectively parses the form used in archive headers. 488 func trimSpace(b []byte) string { 489 return string(bytes.TrimRight(b, " ")) 490 } 491 492 // parseArchive parses a Unix archive of Go object files. 493 // TODO(rsc): Need to skip non-Go object files. 494 // TODO(rsc): Maybe record table of contents in r.p so that 495 // linker can avoid having code to parse archives too. 496 func (r *objReader) parseArchive() error { 497 for r.offset < r.limit { 498 if err := r.readFull(r.tmp[:60]); err != nil { 499 return err 500 } 501 data := r.tmp[:60] 502 503 // Each file is preceded by this text header (slice indices in first column): 504 // 0:16 name 505 // 16:28 date 506 // 28:34 uid 507 // 34:40 gid 508 // 40:48 mode 509 // 48:58 size 510 // 58:60 magic - `\n 511 // We only care about name, size, and magic. 512 // The fields are space-padded on the right. 513 // The size is in decimal. 514 // The file data - size bytes - follows the header. 515 // Headers are 2-byte aligned, so if size is odd, an extra padding 516 // byte sits between the file data and the next header. 517 // The file data that follows is padded to an even number of bytes: 518 // if size is odd, an extra padding byte is inserted betw the next header. 519 if len(data) < 60 { 520 return errTruncatedArchive 521 } 522 if !bytes.Equal(data[58:60], archiveMagic) { 523 return errCorruptArchive 524 } 525 name := trimSpace(data[0:16]) 526 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 527 if err != nil { 528 return errCorruptArchive 529 } 530 data = data[60:] 531 fsize := size + size&1 532 if fsize < 0 || fsize < size { 533 return errCorruptArchive 534 } 535 switch name { 536 case "__.PKGDEF": 537 r.skip(size) 538 default: 539 oldLimit := r.limit 540 r.limit = r.offset + size 541 if err := r.parseObject(nil); err != nil { 542 return fmt.Errorf("parsing archive member %q: %v", name, err) 543 } 544 r.skip(r.limit - r.offset) 545 r.limit = oldLimit 546 } 547 if size&1 != 0 { 548 r.skip(1) 549 } 550 } 551 return nil 552 } 553 554 // parseObject parses a single Go object file. 555 // The prefix is the bytes already read from the file, 556 // typically in order to detect that this is an object file. 557 // The object file consists of a textual header ending in "\n!\n" 558 // and then the part we want to parse begins. 559 // The format of that part is defined in a comment at the top 560 // of src/liblink/objfile.c. 561 func (r *objReader) parseObject(prefix []byte) error { 562 r.p.MaxVersion++ 563 h := make([]byte, 0, 256) 564 h = append(h, prefix...) 565 var c1, c2, c3 byte 566 for { 567 c1, c2, c3 = c2, c3, r.readByte() 568 h = append(h, c3) 569 // The new export format can contain 0 bytes. 570 // Don't consider them errors, only look for r.err != nil. 571 if r.err != nil { 572 return errCorruptObject 573 } 574 if c1 == '\n' && c2 == '!' && c3 == '\n' { 575 break 576 } 577 } 578 579 hs := strings.Fields(string(h)) 580 if len(hs) >= 4 { 581 r.p.Arch = hs[3] 582 } 583 // TODO: extract OS + build ID if/when we need it 584 585 r.readFull(r.tmp[:8]) 586 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go17ld")) { 587 return r.error(errCorruptObject) 588 } 589 590 b := r.readByte() 591 if b != 1 { 592 return r.error(errCorruptObject) 593 } 594 595 // Direct package dependencies. 596 for { 597 s := r.readString() 598 if s == "" { 599 break 600 } 601 r.p.Imports = append(r.p.Imports, s) 602 } 603 604 r.p.SymRefs = []SymID{{"", 0}} 605 for { 606 if b := r.readByte(); b != 0xfe { 607 if b != 0xff { 608 return r.error(errCorruptObject) 609 } 610 break 611 } 612 613 r.readRef() 614 } 615 616 dataLength := r.readInt() 617 r.readInt() // n relocations - ignore 618 r.readInt() // n pcdata - ignore 619 r.readInt() // n autom - ignore 620 r.readInt() // n funcdata - ignore 621 r.readInt() // n files - ignore 622 623 r.dataOffset = r.offset 624 r.skip(int64(dataLength)) 625 626 // Symbols. 627 for { 628 if b := r.readByte(); b != 0xfe { 629 if b != 0xff { 630 return r.error(errCorruptObject) 631 } 632 break 633 } 634 635 typ := r.readInt() 636 s := &Sym{SymID: r.readSymID()} 637 r.p.Syms = append(r.p.Syms, s) 638 s.Kind = SymKind(typ) 639 flags := r.readInt() 640 s.DupOK = flags&1 != 0 641 s.Size = r.readInt() 642 s.Type = r.readSymID() 643 s.Data = r.readData() 644 s.Reloc = make([]Reloc, r.readInt()) 645 for i := range s.Reloc { 646 rel := &s.Reloc[i] 647 rel.Offset = r.readInt() 648 rel.Size = r.readInt() 649 rel.Type = obj.RelocType(r.readInt()) 650 rel.Add = r.readInt() 651 rel.Sym = r.readSymID() 652 } 653 654 if s.Kind == STEXT { 655 f := new(Func) 656 s.Func = f 657 f.Args = r.readInt() 658 f.Frame = r.readInt() 659 flags := r.readInt() 660 f.Leaf = flags&1 != 0 661 f.NoSplit = r.readInt() != 0 662 f.Var = make([]Var, r.readInt()) 663 for i := range f.Var { 664 v := &f.Var[i] 665 v.Name = r.readSymID().Name 666 v.Offset = r.readInt() 667 v.Kind = r.readInt() 668 v.Type = r.readSymID() 669 } 670 671 f.PCSP = r.readData() 672 f.PCFile = r.readData() 673 f.PCLine = r.readData() 674 f.PCData = make([]Data, r.readInt()) 675 for i := range f.PCData { 676 f.PCData[i] = r.readData() 677 } 678 f.FuncData = make([]FuncData, r.readInt()) 679 for i := range f.FuncData { 680 f.FuncData[i].Sym = r.readSymID() 681 } 682 for i := range f.FuncData { 683 f.FuncData[i].Offset = int64(r.readInt()) // TODO 684 } 685 f.File = make([]string, r.readInt()) 686 for i := range f.File { 687 f.File[i] = r.readSymID().Name 688 } 689 } 690 } 691 692 r.readFull(r.tmp[:7]) 693 if !bytes.Equal(r.tmp[:7], []byte("\xffgo17ld")) { 694 return r.error(errCorruptObject) 695 } 696 697 return nil 698 } 699 700 func (r *Reloc) String(insnOffset uint64) string { 701 delta := r.Offset - int(insnOffset) 702 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 703 if r.Sym.Name != "" { 704 if r.Add != 0 { 705 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 706 } 707 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 708 } 709 if r.Add != 0 { 710 return fmt.Sprintf("%s:%d", s, r.Add) 711 } 712 return s 713 }