github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 // TODO(rsc): Write tests. (File format still up in the air a little.) 10 package goobj 11 12 import ( 13 "bufio" 14 "bytes" 15 "cmd/internal/obj" 16 "errors" 17 "fmt" 18 "io" 19 "strconv" 20 "strings" 21 ) 22 23 // A SymKind describes the kind of memory represented by a symbol. 24 type SymKind int 25 26 // This list is taken from include/link.h. 27 28 // Defined SymKind values. 29 // TODO(rsc): Give idiomatic Go names. 30 // TODO(rsc): Reduce the number of symbol types in the object files. 31 const ( 32 _ SymKind = iota 33 34 // readonly, executable 35 STEXT SymKind = obj.STEXT 36 SELFRXSECT SymKind = obj.SELFRXSECT 37 38 // readonly, non-executable 39 STYPE SymKind = obj.STYPE 40 SSTRING SymKind = obj.SSTRING 41 SGOSTRING SymKind = obj.SGOSTRING 42 SGOFUNC SymKind = obj.SGOFUNC 43 SRODATA SymKind = obj.SRODATA 44 SFUNCTAB SymKind = obj.SFUNCTAB 45 STYPELINK SymKind = obj.STYPELINK 46 SITABLINK SymKind = obj.SITABLINK 47 SSYMTAB SymKind = obj.SSYMTAB // TODO: move to unmapped section 48 SPCLNTAB SymKind = obj.SPCLNTAB 49 SELFROSECT SymKind = obj.SELFROSECT 50 51 // writable, non-executable 52 SMACHOPLT SymKind = obj.SMACHOPLT 53 SELFSECT SymKind = obj.SELFSECT 54 SMACHO SymKind = obj.SMACHO // Mach-O __nl_symbol_ptr 55 SMACHOGOT SymKind = obj.SMACHOGOT 56 SWINDOWS SymKind = obj.SWINDOWS 57 SELFGOT SymKind = obj.SELFGOT 58 SNOPTRDATA SymKind = obj.SNOPTRDATA 59 SINITARR SymKind = obj.SINITARR 60 SDATA SymKind = obj.SDATA 61 SBSS SymKind = obj.SBSS 62 SNOPTRBSS SymKind = obj.SNOPTRBSS 63 STLSBSS SymKind = obj.STLSBSS 64 65 // not mapped 66 SXREF SymKind = obj.SXREF 67 SMACHOSYMSTR SymKind = obj.SMACHOSYMSTR 68 SMACHOSYMTAB SymKind = obj.SMACHOSYMTAB 69 SMACHOINDIRECTPLT SymKind = obj.SMACHOINDIRECTPLT 70 SMACHOINDIRECTGOT SymKind = obj.SMACHOINDIRECTGOT 71 SFILE SymKind = obj.SFILE 72 SFILEPATH SymKind = obj.SFILEPATH 73 SCONST SymKind = obj.SCONST 74 SDYNIMPORT SymKind = obj.SDYNIMPORT 75 SHOSTOBJ SymKind = obj.SHOSTOBJ 76 ) 77 78 var symKindStrings = []string{ 79 SBSS: "SBSS", 80 SCONST: "SCONST", 81 SDATA: "SDATA", 82 SDYNIMPORT: "SDYNIMPORT", 83 SELFROSECT: "SELFROSECT", 84 SELFRXSECT: "SELFRXSECT", 85 SELFSECT: "SELFSECT", 86 SFILE: "SFILE", 87 SFILEPATH: "SFILEPATH", 88 SFUNCTAB: "SFUNCTAB", 89 SGOFUNC: "SGOFUNC", 90 SGOSTRING: "SGOSTRING", 91 SHOSTOBJ: "SHOSTOBJ", 92 SINITARR: "SINITARR", 93 SMACHO: "SMACHO", 94 SMACHOGOT: "SMACHOGOT", 95 SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", 96 SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", 97 SMACHOPLT: "SMACHOPLT", 98 SMACHOSYMSTR: "SMACHOSYMSTR", 99 SMACHOSYMTAB: "SMACHOSYMTAB", 100 SNOPTRBSS: "SNOPTRBSS", 101 SNOPTRDATA: "SNOPTRDATA", 102 SPCLNTAB: "SPCLNTAB", 103 SRODATA: "SRODATA", 104 SSTRING: "SSTRING", 105 SSYMTAB: "SSYMTAB", 106 STEXT: "STEXT", 107 STLSBSS: "STLSBSS", 108 STYPE: "STYPE", 109 STYPELINK: "STYPELINK", 110 SITABLINK: "SITABLINK", 111 SWINDOWS: "SWINDOWS", 112 SXREF: "SXREF", 113 } 114 115 func (k SymKind) String() string { 116 if k < 0 || int(k) >= len(symKindStrings) { 117 return fmt.Sprintf("SymKind(%d)", k) 118 } 119 return symKindStrings[k] 120 } 121 122 // A Sym is a named symbol in an object file. 123 type Sym struct { 124 SymID // symbol identifier (name and version) 125 Kind SymKind // kind of symbol 126 DupOK bool // are duplicate definitions okay? 127 Size int // size of corresponding data 128 Type SymID // symbol for Go type information 129 Data Data // memory image of symbol 130 Reloc []Reloc // relocations to apply to Data 131 Func *Func // additional data for functions 132 } 133 134 // A SymID - the combination of Name and Version - uniquely identifies 135 // a symbol within a package. 136 type SymID struct { 137 // Name is the name of a symbol. 138 Name string 139 140 // Version is zero for symbols with global visibility. 141 // Symbols with only file visibility (such as file-level static 142 // declarations in C) have a non-zero version distinguishing 143 // a symbol in one file from a symbol of the same name 144 // in another file 145 Version int 146 } 147 148 func (s SymID) String() string { 149 if s.Version == 0 { 150 return s.Name 151 } 152 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 153 } 154 155 // A Data is a reference to data stored in an object file. 156 // It records the offset and size of the data, so that a client can 157 // read the data only if necessary. 158 type Data struct { 159 Offset int64 160 Size int64 161 } 162 163 // A Reloc describes a relocation applied to a memory image to refer 164 // to an address within a particular symbol. 165 type Reloc struct { 166 // The bytes at [Offset, Offset+Size) within the containing Sym 167 // should be updated to refer to the address Add bytes after the start 168 // of the symbol Sym. 169 Offset int 170 Size int 171 Sym SymID 172 Add int 173 174 // The Type records the form of address expected in the bytes 175 // described by the previous fields: absolute, PC-relative, and so on. 176 // TODO(rsc): The interpretation of Type is not exposed by this package. 177 Type obj.RelocType 178 } 179 180 // A Var describes a variable in a function stack frame: a declared 181 // local variable, an input argument, or an output result. 182 type Var struct { 183 // The combination of Name, Kind, and Offset uniquely 184 // identifies a variable in a function stack frame. 185 // Using fewer of these - in particular, using only Name - does not. 186 Name string // Name of variable. 187 Kind int // TODO(rsc): Define meaning. 188 Offset int // Frame offset. TODO(rsc): Define meaning. 189 190 Type SymID // Go type for variable. 191 } 192 193 // Func contains additional per-symbol information specific to functions. 194 type Func struct { 195 Args int // size in bytes of argument frame: inputs and outputs 196 Frame int // size in bytes of local variable frame 197 Leaf bool // function omits save of link register (ARM) 198 NoSplit bool // function omits stack split prologue 199 Var []Var // detail about local variables 200 PCSP Data // PC → SP offset map 201 PCFile Data // PC → file number map (index into File) 202 PCLine Data // PC → line number map 203 PCData []Data // PC → runtime support data map 204 FuncData []FuncData // non-PC-specific runtime support data 205 File []string // paths indexed by PCFile 206 } 207 208 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 209 210 // A FuncData is a single function-specific data value. 211 type FuncData struct { 212 Sym SymID // symbol holding data 213 Offset int64 // offset into symbol for funcdata pointer 214 } 215 216 // A Package is a parsed Go object file or archive defining a Go package. 217 type Package struct { 218 ImportPath string // import path denoting this package 219 Imports []string // packages imported by this package 220 SymRefs []SymID // list of symbol names and versions referred to by this pack 221 Syms []*Sym // symbols defined by this package 222 MaxVersion int // maximum Version in any SymID in Syms 223 Arch string // architecture 224 } 225 226 var ( 227 archiveHeader = []byte("!<arch>\n") 228 archiveMagic = []byte("`\n") 229 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 230 231 errCorruptArchive = errors.New("corrupt archive") 232 errTruncatedArchive = errors.New("truncated archive") 233 errCorruptObject = errors.New("corrupt object file") 234 errNotObject = errors.New("unrecognized object file format") 235 ) 236 237 // An objReader is an object file reader. 238 type objReader struct { 239 p *Package 240 b *bufio.Reader 241 f io.ReadSeeker 242 err error 243 offset int64 244 dataOffset int64 245 limit int64 246 tmp [256]byte 247 pkgprefix string 248 } 249 250 // importPathToPrefix returns the prefix that will be used in the 251 // final symbol table for the given import path. 252 // We escape '%', '"', all control characters and non-ASCII bytes, 253 // and any '.' after the final slash. 254 // 255 // See ../../../cmd/ld/lib.c:/^pathtoprefix and 256 // ../../../cmd/gc/subr.c:/^pathtoprefix. 257 func importPathToPrefix(s string) string { 258 // find index of last slash, if any, or else -1. 259 // used for determining whether an index is after the last slash. 260 slash := strings.LastIndex(s, "/") 261 262 // check for chars that need escaping 263 n := 0 264 for r := 0; r < len(s); r++ { 265 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 266 n++ 267 } 268 } 269 270 // quick exit 271 if n == 0 { 272 return s 273 } 274 275 // escape 276 const hex = "0123456789abcdef" 277 p := make([]byte, 0, len(s)+2*n) 278 for r := 0; r < len(s); r++ { 279 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 280 p = append(p, '%', hex[c>>4], hex[c&0xF]) 281 } else { 282 p = append(p, c) 283 } 284 } 285 286 return string(p) 287 } 288 289 // init initializes r to read package p from f. 290 func (r *objReader) init(f io.ReadSeeker, p *Package) { 291 r.f = f 292 r.p = p 293 r.offset, _ = f.Seek(0, io.SeekCurrent) 294 r.limit, _ = f.Seek(0, io.SeekEnd) 295 f.Seek(r.offset, io.SeekStart) 296 r.b = bufio.NewReader(f) 297 r.pkgprefix = importPathToPrefix(p.ImportPath) + "." 298 } 299 300 // error records that an error occurred. 301 // It returns only the first error, so that an error 302 // caused by an earlier error does not discard information 303 // about the earlier error. 304 func (r *objReader) error(err error) error { 305 if r.err == nil { 306 if err == io.EOF { 307 err = io.ErrUnexpectedEOF 308 } 309 r.err = err 310 } 311 // panic("corrupt") // useful for debugging 312 return r.err 313 } 314 315 // readByte reads and returns a byte from the input file. 316 // On I/O error or EOF, it records the error but returns byte 0. 317 // A sequence of 0 bytes will eventually terminate any 318 // parsing state in the object file. In particular, it ends the 319 // reading of a varint. 320 func (r *objReader) readByte() byte { 321 if r.err != nil { 322 return 0 323 } 324 if r.offset >= r.limit { 325 r.error(io.ErrUnexpectedEOF) 326 return 0 327 } 328 b, err := r.b.ReadByte() 329 if err != nil { 330 if err == io.EOF { 331 err = io.ErrUnexpectedEOF 332 } 333 r.error(err) 334 b = 0 335 } else { 336 r.offset++ 337 } 338 return b 339 } 340 341 // read reads exactly len(b) bytes from the input file. 342 // If an error occurs, read returns the error but also 343 // records it, so it is safe for callers to ignore the result 344 // as long as delaying the report is not a problem. 345 func (r *objReader) readFull(b []byte) error { 346 if r.err != nil { 347 return r.err 348 } 349 if r.offset+int64(len(b)) > r.limit { 350 return r.error(io.ErrUnexpectedEOF) 351 } 352 n, err := io.ReadFull(r.b, b) 353 r.offset += int64(n) 354 if err != nil { 355 return r.error(err) 356 } 357 return nil 358 } 359 360 // readInt reads a zigzag varint from the input file. 361 func (r *objReader) readInt() int { 362 var u uint64 363 364 for shift := uint(0); ; shift += 7 { 365 if shift >= 64 { 366 r.error(errCorruptObject) 367 return 0 368 } 369 c := r.readByte() 370 u |= uint64(c&0x7F) << shift 371 if c&0x80 == 0 { 372 break 373 } 374 } 375 376 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 377 if int64(int(v)) != v { 378 r.error(errCorruptObject) // TODO 379 return 0 380 } 381 return int(v) 382 } 383 384 // readString reads a length-delimited string from the input file. 385 func (r *objReader) readString() string { 386 n := r.readInt() 387 buf := make([]byte, n) 388 r.readFull(buf) 389 return string(buf) 390 } 391 392 // readSymID reads a SymID from the input file. 393 func (r *objReader) readSymID() SymID { 394 i := r.readInt() 395 return r.p.SymRefs[i] 396 } 397 398 func (r *objReader) readRef() { 399 name, vers := r.readString(), r.readInt() 400 401 // In a symbol name in an object file, "". denotes the 402 // prefix for the package in which the object file has been found. 403 // Expand it. 404 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 405 406 // An individual object file only records version 0 (extern) or 1 (static). 407 // To make static symbols unique across all files being read, we 408 // replace version 1 with the version corresponding to the current 409 // file number. The number is incremented on each call to parseObject. 410 if vers != 0 { 411 vers = r.p.MaxVersion 412 } 413 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 414 } 415 416 // readData reads a data reference from the input file. 417 func (r *objReader) readData() Data { 418 n := r.readInt() 419 d := Data{Offset: r.dataOffset, Size: int64(n)} 420 r.dataOffset += int64(n) 421 return d 422 } 423 424 // skip skips n bytes in the input. 425 func (r *objReader) skip(n int64) { 426 if n < 0 { 427 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 428 } 429 if n < int64(len(r.tmp)) { 430 // Since the data is so small, a just reading from the buffered 431 // reader is better than flushing the buffer and seeking. 432 r.readFull(r.tmp[:n]) 433 } else if n <= int64(r.b.Buffered()) { 434 // Even though the data is not small, it has already been read. 435 // Advance the buffer instead of seeking. 436 for n > int64(len(r.tmp)) { 437 r.readFull(r.tmp[:]) 438 n -= int64(len(r.tmp)) 439 } 440 r.readFull(r.tmp[:n]) 441 } else { 442 // Seek, giving up buffered data. 443 _, err := r.f.Seek(r.offset+n, io.SeekStart) 444 if err != nil { 445 r.error(err) 446 } 447 r.offset += n 448 r.b.Reset(r.f) 449 } 450 } 451 452 // Parse parses an object file or archive from r, 453 // assuming that its import path is pkgpath. 454 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { 455 if pkgpath == "" { 456 pkgpath = `""` 457 } 458 p := new(Package) 459 p.ImportPath = pkgpath 460 461 var rd objReader 462 rd.init(r, p) 463 err := rd.readFull(rd.tmp[:8]) 464 if err != nil { 465 if err == io.EOF { 466 err = io.ErrUnexpectedEOF 467 } 468 return nil, err 469 } 470 471 switch { 472 default: 473 return nil, errNotObject 474 475 case bytes.Equal(rd.tmp[:8], archiveHeader): 476 if err := rd.parseArchive(); err != nil { 477 return nil, err 478 } 479 case bytes.Equal(rd.tmp[:8], goobjHeader): 480 if err := rd.parseObject(goobjHeader); err != nil { 481 return nil, err 482 } 483 } 484 485 return p, nil 486 } 487 488 // trimSpace removes trailing spaces from b and returns the corresponding string. 489 // This effectively parses the form used in archive headers. 490 func trimSpace(b []byte) string { 491 return string(bytes.TrimRight(b, " ")) 492 } 493 494 // parseArchive parses a Unix archive of Go object files. 495 // TODO(rsc): Need to skip non-Go object files. 496 // TODO(rsc): Maybe record table of contents in r.p so that 497 // linker can avoid having code to parse archives too. 498 func (r *objReader) parseArchive() error { 499 for r.offset < r.limit { 500 if err := r.readFull(r.tmp[:60]); err != nil { 501 return err 502 } 503 data := r.tmp[:60] 504 505 // Each file is preceded by this text header (slice indices in first column): 506 // 0:16 name 507 // 16:28 date 508 // 28:34 uid 509 // 34:40 gid 510 // 40:48 mode 511 // 48:58 size 512 // 58:60 magic - `\n 513 // We only care about name, size, and magic. 514 // The fields are space-padded on the right. 515 // The size is in decimal. 516 // The file data - size bytes - follows the header. 517 // Headers are 2-byte aligned, so if size is odd, an extra padding 518 // byte sits between the file data and the next header. 519 // The file data that follows is padded to an even number of bytes: 520 // if size is odd, an extra padding byte is inserted betw the next header. 521 if len(data) < 60 { 522 return errTruncatedArchive 523 } 524 if !bytes.Equal(data[58:60], archiveMagic) { 525 return errCorruptArchive 526 } 527 name := trimSpace(data[0:16]) 528 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 529 if err != nil { 530 return errCorruptArchive 531 } 532 data = data[60:] 533 fsize := size + size&1 534 if fsize < 0 || fsize < size { 535 return errCorruptArchive 536 } 537 switch name { 538 case "__.PKGDEF": 539 r.skip(size) 540 default: 541 oldLimit := r.limit 542 r.limit = r.offset + size 543 if err := r.parseObject(nil); err != nil { 544 return fmt.Errorf("parsing archive member %q: %v", name, err) 545 } 546 r.skip(r.limit - r.offset) 547 r.limit = oldLimit 548 } 549 if size&1 != 0 { 550 r.skip(1) 551 } 552 } 553 return nil 554 } 555 556 // parseObject parses a single Go object file. 557 // The prefix is the bytes already read from the file, 558 // typically in order to detect that this is an object file. 559 // The object file consists of a textual header ending in "\n!\n" 560 // and then the part we want to parse begins. 561 // The format of that part is defined in a comment at the top 562 // of src/liblink/objfile.c. 563 func (r *objReader) parseObject(prefix []byte) error { 564 r.p.MaxVersion++ 565 h := make([]byte, 0, 256) 566 h = append(h, prefix...) 567 var c1, c2, c3 byte 568 for { 569 c1, c2, c3 = c2, c3, r.readByte() 570 h = append(h, c3) 571 // The new export format can contain 0 bytes. 572 // Don't consider them errors, only look for r.err != nil. 573 if r.err != nil { 574 return errCorruptObject 575 } 576 if c1 == '\n' && c2 == '!' && c3 == '\n' { 577 break 578 } 579 } 580 581 hs := strings.Fields(string(h)) 582 if len(hs) >= 4 { 583 r.p.Arch = hs[3] 584 } 585 // TODO: extract OS + build ID if/when we need it 586 587 r.readFull(r.tmp[:8]) 588 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go17ld")) { 589 return r.error(errCorruptObject) 590 } 591 592 b := r.readByte() 593 if b != 1 { 594 return r.error(errCorruptObject) 595 } 596 597 // Direct package dependencies. 598 for { 599 s := r.readString() 600 if s == "" { 601 break 602 } 603 r.p.Imports = append(r.p.Imports, s) 604 } 605 606 r.p.SymRefs = []SymID{{"", 0}} 607 for { 608 if b := r.readByte(); b != 0xfe { 609 if b != 0xff { 610 return r.error(errCorruptObject) 611 } 612 break 613 } 614 615 r.readRef() 616 } 617 618 dataLength := r.readInt() 619 r.readInt() // n relocations - ignore 620 r.readInt() // n pcdata - ignore 621 r.readInt() // n autom - ignore 622 r.readInt() // n funcdata - ignore 623 r.readInt() // n files - ignore 624 625 r.dataOffset = r.offset 626 r.skip(int64(dataLength)) 627 628 // Symbols. 629 for { 630 if b := r.readByte(); b != 0xfe { 631 if b != 0xff { 632 return r.error(errCorruptObject) 633 } 634 break 635 } 636 637 typ := r.readInt() 638 s := &Sym{SymID: r.readSymID()} 639 r.p.Syms = append(r.p.Syms, s) 640 s.Kind = SymKind(typ) 641 flags := r.readInt() 642 s.DupOK = flags&1 != 0 643 s.Size = r.readInt() 644 s.Type = r.readSymID() 645 s.Data = r.readData() 646 s.Reloc = make([]Reloc, r.readInt()) 647 for i := range s.Reloc { 648 rel := &s.Reloc[i] 649 rel.Offset = r.readInt() 650 rel.Size = r.readInt() 651 rel.Type = obj.RelocType(r.readInt()) 652 rel.Add = r.readInt() 653 rel.Sym = r.readSymID() 654 } 655 656 if s.Kind == STEXT { 657 f := new(Func) 658 s.Func = f 659 f.Args = r.readInt() 660 f.Frame = r.readInt() 661 flags := r.readInt() 662 f.Leaf = flags&1 != 0 663 f.NoSplit = r.readInt() != 0 664 f.Var = make([]Var, r.readInt()) 665 for i := range f.Var { 666 v := &f.Var[i] 667 v.Name = r.readSymID().Name 668 v.Offset = r.readInt() 669 v.Kind = r.readInt() 670 v.Type = r.readSymID() 671 } 672 673 f.PCSP = r.readData() 674 f.PCFile = r.readData() 675 f.PCLine = r.readData() 676 f.PCData = make([]Data, r.readInt()) 677 for i := range f.PCData { 678 f.PCData[i] = r.readData() 679 } 680 f.FuncData = make([]FuncData, r.readInt()) 681 for i := range f.FuncData { 682 f.FuncData[i].Sym = r.readSymID() 683 } 684 for i := range f.FuncData { 685 f.FuncData[i].Offset = int64(r.readInt()) // TODO 686 } 687 f.File = make([]string, r.readInt()) 688 for i := range f.File { 689 f.File[i] = r.readSymID().Name 690 } 691 } 692 } 693 694 r.readFull(r.tmp[:7]) 695 if !bytes.Equal(r.tmp[:7], []byte("\xffgo17ld")) { 696 return r.error(errCorruptObject) 697 } 698 699 return nil 700 } 701 702 func (r *Reloc) String(insnOffset uint64) string { 703 delta := r.Offset - int(insnOffset) 704 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 705 if r.Sym.Name != "" { 706 if r.Add != 0 { 707 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 708 } 709 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 710 } 711 if r.Add != 0 { 712 return fmt.Sprintf("%s:%d", s, r.Add) 713 } 714 return s 715 }