github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 // TODO(rsc): Write tests. (File format still up in the air a little.) 10 package goobj 11 12 import ( 13 "bufio" 14 "bytes" 15 "errors" 16 "fmt" 17 "io" 18 "strconv" 19 "strings" 20 ) 21 22 // A SymKind describes the kind of memory represented by a symbol. 23 type SymKind int 24 25 // This list is taken from include/link.h. 26 27 // Defined SymKind values. 28 // TODO(rsc): Give idiomatic Go names. 29 // TODO(rsc): Reduce the number of symbol types in the object files. 30 const ( 31 _ SymKind = iota 32 33 // readonly, executable 34 STEXT 35 SELFRXSECT 36 37 // readonly, non-executable 38 STYPE 39 SSTRING 40 SGOSTRING 41 SGOFUNC 42 SRODATA 43 SFUNCTAB 44 STYPELINK 45 SSYMTAB // TODO: move to unmapped section 46 SPCLNTAB 47 SELFROSECT 48 49 // writable, non-executable 50 SMACHOPLT 51 SELFSECT 52 SMACHO // Mach-O __nl_symbol_ptr 53 SMACHOGOT 54 SNOPTRDATA 55 SINITARR 56 SDATA 57 SWINDOWS 58 SBSS 59 SNOPTRBSS 60 STLSBSS 61 62 // not mapped 63 SXREF 64 SMACHOSYMSTR 65 SMACHOSYMTAB 66 SMACHOINDIRECTPLT 67 SMACHOINDIRECTGOT 68 SFILE 69 SFILEPATH 70 SCONST 71 SDYNIMPORT 72 SHOSTOBJ 73 ) 74 75 var symKindStrings = []string{ 76 SBSS: "SBSS", 77 SCONST: "SCONST", 78 SDATA: "SDATA", 79 SDYNIMPORT: "SDYNIMPORT", 80 SELFROSECT: "SELFROSECT", 81 SELFRXSECT: "SELFRXSECT", 82 SELFSECT: "SELFSECT", 83 SFILE: "SFILE", 84 SFILEPATH: "SFILEPATH", 85 SFUNCTAB: "SFUNCTAB", 86 SGOFUNC: "SGOFUNC", 87 SGOSTRING: "SGOSTRING", 88 SHOSTOBJ: "SHOSTOBJ", 89 SINITARR: "SINITARR", 90 SMACHO: "SMACHO", 91 SMACHOGOT: "SMACHOGOT", 92 SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", 93 SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", 94 SMACHOPLT: "SMACHOPLT", 95 SMACHOSYMSTR: "SMACHOSYMSTR", 96 SMACHOSYMTAB: "SMACHOSYMTAB", 97 SNOPTRBSS: "SNOPTRBSS", 98 SNOPTRDATA: "SNOPTRDATA", 99 SPCLNTAB: "SPCLNTAB", 100 SRODATA: "SRODATA", 101 SSTRING: "SSTRING", 102 SSYMTAB: "SSYMTAB", 103 STEXT: "STEXT", 104 STLSBSS: "STLSBSS", 105 STYPE: "STYPE", 106 STYPELINK: "STYPELINK", 107 SWINDOWS: "SWINDOWS", 108 SXREF: "SXREF", 109 } 110 111 func (k SymKind) String() string { 112 if k < 0 || int(k) >= len(symKindStrings) { 113 return fmt.Sprintf("SymKind(%d)", k) 114 } 115 return symKindStrings[k] 116 } 117 118 // A Sym is a named symbol in an object file. 119 type Sym struct { 120 SymID // symbol identifier (name and version) 121 Kind SymKind // kind of symbol 122 DupOK bool // are duplicate definitions okay? 123 Size int // size of corresponding data 124 Type SymID // symbol for Go type information 125 Data Data // memory image of symbol 126 Reloc []Reloc // relocations to apply to Data 127 Func *Func // additional data for functions 128 } 129 130 // A SymID - the combination of Name and Version - uniquely identifies 131 // a symbol within a package. 132 type SymID struct { 133 // Name is the name of a symbol. 134 Name string 135 136 // Version is zero for symbols with global visibility. 137 // Symbols with only file visibility (such as file-level static 138 // declarations in C) have a non-zero version distinguishing 139 // a symbol in one file from a symbol of the same name 140 // in another file 141 Version int 142 } 143 144 func (s SymID) String() string { 145 if s.Version == 0 { 146 return s.Name 147 } 148 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 149 } 150 151 // A Data is a reference to data stored in an object file. 152 // It records the offset and size of the data, so that a client can 153 // read the data only if necessary. 154 type Data struct { 155 Offset int64 156 Size int64 157 } 158 159 // A Reloc describes a relocation applied to a memory image to refer 160 // to an address within a particular symbol. 161 type Reloc struct { 162 // The bytes at [Offset, Offset+Size) within the memory image 163 // should be updated to refer to the address Add bytes after the start 164 // of the symbol Sym. 165 Offset int 166 Size int 167 Sym SymID 168 Add int 169 170 // The Type records the form of address expected in the bytes 171 // described by the previous fields: absolute, PC-relative, and so on. 172 // TODO(rsc): The interpretation of Type is not exposed by this package. 173 Type int 174 } 175 176 // A Var describes a variable in a function stack frame: a declared 177 // local variable, an input argument, or an output result. 178 type Var struct { 179 // The combination of Name, Kind, and Offset uniquely 180 // identifies a variable in a function stack frame. 181 // Using fewer of these - in particular, using only Name - does not. 182 Name string // Name of variable. 183 Kind int // TODO(rsc): Define meaning. 184 Offset int // Frame offset. TODO(rsc): Define meaning. 185 186 Type SymID // Go type for variable. 187 } 188 189 // Func contains additional per-symbol information specific to functions. 190 type Func struct { 191 Args int // size in bytes of argument frame: inputs and outputs 192 Frame int // size in bytes of local variable frame 193 Leaf bool // function omits save of link register (ARM) 194 NoSplit bool // function omits stack split prologue 195 Var []Var // detail about local variables 196 PCSP Data // PC → SP offset map 197 PCFile Data // PC → file number map (index into File) 198 PCLine Data // PC → line number map 199 PCData []Data // PC → runtime support data map 200 FuncData []FuncData // non-PC-specific runtime support data 201 File []string // paths indexed by PCFile 202 } 203 204 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 205 206 // A FuncData is a single function-specific data value. 207 type FuncData struct { 208 Sym SymID // symbol holding data 209 Offset int64 // offset into symbol for funcdata pointer 210 } 211 212 // A Package is a parsed Go object file or archive defining a Go package. 213 type Package struct { 214 ImportPath string // import path denoting this package 215 Imports []string // packages imported by this package 216 Syms []*Sym // symbols defined by this package 217 MaxVersion int // maximum Version in any SymID in Syms 218 } 219 220 var ( 221 archiveHeader = []byte("!<arch>\n") 222 archiveMagic = []byte("`\n") 223 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 224 225 errCorruptArchive = errors.New("corrupt archive") 226 errTruncatedArchive = errors.New("truncated archive") 227 errNotArchive = errors.New("unrecognized archive format") 228 229 errCorruptObject = errors.New("corrupt object file") 230 errTruncatedObject = errors.New("truncated object file") 231 errNotObject = errors.New("unrecognized object file format") 232 ) 233 234 // An objReader is an object file reader. 235 type objReader struct { 236 p *Package 237 b *bufio.Reader 238 f io.ReadSeeker 239 err error 240 offset int64 241 limit int64 242 tmp [256]byte 243 pkg string 244 pkgprefix string 245 } 246 247 // importPathToPrefix returns the prefix that will be used in the 248 // final symbol table for the given import path. 249 // We escape '%', '"', all control characters and non-ASCII bytes, 250 // and any '.' after the final slash. 251 // 252 // See ../../../cmd/ld/lib.c:/^pathtoprefix and 253 // ../../../cmd/gc/subr.c:/^pathtoprefix. 254 func importPathToPrefix(s string) string { 255 // find index of last slash, if any, or else -1. 256 // used for determining whether an index is after the last slash. 257 slash := strings.LastIndex(s, "/") 258 259 // check for chars that need escaping 260 n := 0 261 for r := 0; r < len(s); r++ { 262 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 263 n++ 264 } 265 } 266 267 // quick exit 268 if n == 0 { 269 return s 270 } 271 272 // escape 273 const hex = "0123456789abcdef" 274 p := make([]byte, 0, len(s)+2*n) 275 for r := 0; r < len(s); r++ { 276 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 277 p = append(p, '%', hex[c>>4], hex[c&0xF]) 278 } else { 279 p = append(p, c) 280 } 281 } 282 283 return string(p) 284 } 285 286 // init initializes r to read package p from f. 287 func (r *objReader) init(f io.ReadSeeker, p *Package) { 288 r.f = f 289 r.p = p 290 r.offset, _ = f.Seek(0, 1) 291 r.limit, _ = f.Seek(0, 2) 292 f.Seek(r.offset, 0) 293 r.b = bufio.NewReader(f) 294 r.pkgprefix = importPathToPrefix(p.ImportPath) + "." 295 } 296 297 // error records that an error occurred. 298 // It returns only the first error, so that an error 299 // caused by an earlier error does not discard information 300 // about the earlier error. 301 func (r *objReader) error(err error) error { 302 if r.err == nil { 303 if err == io.EOF { 304 err = io.ErrUnexpectedEOF 305 } 306 r.err = err 307 } 308 // panic("corrupt") // useful for debugging 309 return r.err 310 } 311 312 // readByte reads and returns a byte from the input file. 313 // On I/O error or EOF, it records the error but returns byte 0. 314 // A sequence of 0 bytes will eventually terminate any 315 // parsing state in the object file. In particular, it ends the 316 // reading of a varint. 317 func (r *objReader) readByte() byte { 318 if r.err != nil { 319 return 0 320 } 321 if r.offset >= r.limit { 322 r.error(io.ErrUnexpectedEOF) 323 return 0 324 } 325 b, err := r.b.ReadByte() 326 if err != nil { 327 if err == io.EOF { 328 err = io.ErrUnexpectedEOF 329 } 330 r.error(err) 331 b = 0 332 } else { 333 r.offset++ 334 } 335 return b 336 } 337 338 // read reads exactly len(b) bytes from the input file. 339 // If an error occurs, read returns the error but also 340 // records it, so it is safe for callers to ignore the result 341 // as long as delaying the report is not a problem. 342 func (r *objReader) readFull(b []byte) error { 343 if r.err != nil { 344 return r.err 345 } 346 if r.offset+int64(len(b)) > r.limit { 347 return r.error(io.ErrUnexpectedEOF) 348 } 349 n, err := io.ReadFull(r.b, b) 350 r.offset += int64(n) 351 if err != nil { 352 return r.error(err) 353 } 354 return nil 355 } 356 357 // readInt reads a zigzag varint from the input file. 358 func (r *objReader) readInt() int { 359 var u uint64 360 361 for shift := uint(0); ; shift += 7 { 362 if shift >= 64 { 363 r.error(errCorruptObject) 364 return 0 365 } 366 c := r.readByte() 367 u |= uint64(c&0x7F) << shift 368 if c&0x80 == 0 { 369 break 370 } 371 } 372 373 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 374 if int64(int(v)) != v { 375 r.error(errCorruptObject) // TODO 376 return 0 377 } 378 return int(v) 379 } 380 381 // readString reads a length-delimited string from the input file. 382 func (r *objReader) readString() string { 383 n := r.readInt() 384 buf := make([]byte, n) 385 r.readFull(buf) 386 return string(buf) 387 } 388 389 // readSymID reads a SymID from the input file. 390 func (r *objReader) readSymID() SymID { 391 name, vers := r.readString(), r.readInt() 392 393 // In a symbol name in an object file, "". denotes the 394 // prefix for the package in which the object file has been found. 395 // Expand it. 396 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 397 398 // An individual object file only records version 0 (extern) or 1 (static). 399 // To make static symbols unique across all files being read, we 400 // replace version 1 with the version corresponding to the current 401 // file number. The number is incremented on each call to parseObject. 402 if vers != 0 { 403 vers = r.p.MaxVersion 404 } 405 406 return SymID{name, vers} 407 } 408 409 // readData reads a data reference from the input file. 410 func (r *objReader) readData() Data { 411 n := r.readInt() 412 d := Data{Offset: r.offset, Size: int64(n)} 413 r.skip(int64(n)) 414 return d 415 } 416 417 // skip skips n bytes in the input. 418 func (r *objReader) skip(n int64) { 419 if n < 0 { 420 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 421 } 422 if n < int64(len(r.tmp)) { 423 // Since the data is so small, a just reading from the buffered 424 // reader is better than flushing the buffer and seeking. 425 r.readFull(r.tmp[:n]) 426 } else if n <= int64(r.b.Buffered()) { 427 // Even though the data is not small, it has already been read. 428 // Advance the buffer instead of seeking. 429 for n > int64(len(r.tmp)) { 430 r.readFull(r.tmp[:]) 431 n -= int64(len(r.tmp)) 432 } 433 r.readFull(r.tmp[:n]) 434 } else { 435 // Seek, giving up buffered data. 436 _, err := r.f.Seek(r.offset+n, 0) 437 if err != nil { 438 r.error(err) 439 } 440 r.offset += n 441 r.b.Reset(r.f) 442 } 443 } 444 445 // Parse parses an object file or archive from r, 446 // assuming that its import path is pkgpath. 447 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { 448 if pkgpath == "" { 449 pkgpath = `""` 450 } 451 p := new(Package) 452 p.ImportPath = pkgpath 453 454 var rd objReader 455 rd.init(r, p) 456 err := rd.readFull(rd.tmp[:8]) 457 if err != nil { 458 if err == io.EOF { 459 err = io.ErrUnexpectedEOF 460 } 461 return nil, err 462 } 463 464 switch { 465 default: 466 return nil, errNotObject 467 468 case bytes.Equal(rd.tmp[:8], archiveHeader): 469 if err := rd.parseArchive(); err != nil { 470 return nil, err 471 } 472 case bytes.Equal(rd.tmp[:8], goobjHeader): 473 if err := rd.parseObject(goobjHeader); err != nil { 474 return nil, err 475 } 476 } 477 478 return p, nil 479 } 480 481 // trimSpace removes trailing spaces from b and returns the corresponding string. 482 // This effectively parses the form used in archive headers. 483 func trimSpace(b []byte) string { 484 return string(bytes.TrimRight(b, " ")) 485 } 486 487 // parseArchive parses a Unix archive of Go object files. 488 // TODO(rsc): Need to skip non-Go object files. 489 // TODO(rsc): Maybe record table of contents in r.p so that 490 // linker can avoid having code to parse archives too. 491 func (r *objReader) parseArchive() error { 492 for r.offset < r.limit { 493 if err := r.readFull(r.tmp[:60]); err != nil { 494 return err 495 } 496 data := r.tmp[:60] 497 498 // Each file is preceded by this text header (slice indices in first column): 499 // 0:16 name 500 // 16:28 date 501 // 28:34 uid 502 // 34:40 gid 503 // 40:48 mode 504 // 48:58 size 505 // 58:60 magic - `\n 506 // We only care about name, size, and magic. 507 // The fields are space-padded on the right. 508 // The size is in decimal. 509 // The file data - size bytes - follows the header. 510 // Headers are 2-byte aligned, so if size is odd, an extra padding 511 // byte sits between the file data and the next header. 512 // The file data that follows is padded to an even number of bytes: 513 // if size is odd, an extra padding byte is inserted betw the next header. 514 if len(data) < 60 { 515 return errTruncatedArchive 516 } 517 if !bytes.Equal(data[58:60], archiveMagic) { 518 return errCorruptArchive 519 } 520 name := trimSpace(data[0:16]) 521 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 522 if err != nil { 523 return errCorruptArchive 524 } 525 data = data[60:] 526 fsize := size + size&1 527 if fsize < 0 || fsize < size { 528 return errCorruptArchive 529 } 530 switch name { 531 case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF": 532 r.skip(size) 533 default: 534 oldLimit := r.limit 535 r.limit = r.offset + size 536 if err := r.parseObject(nil); err != nil { 537 return fmt.Errorf("parsing archive member %q: %v", name, err) 538 } 539 r.skip(r.limit - r.offset) 540 r.limit = oldLimit 541 } 542 if size&1 != 0 { 543 r.skip(1) 544 } 545 } 546 return nil 547 } 548 549 // parseObject parses a single Go object file. 550 // The prefix is the bytes already read from the file, 551 // typically in order to detect that this is an object file. 552 // The object file consists of a textual header ending in "\n!\n" 553 // and then the part we want to parse begins. 554 // The format of that part is defined in a comment at the top 555 // of src/liblink/objfile.c. 556 func (r *objReader) parseObject(prefix []byte) error { 557 // TODO(rsc): Maybe use prefix and the initial input to 558 // record the header line from the file, which would 559 // give the architecture and other version information. 560 561 r.p.MaxVersion++ 562 var c1, c2, c3 byte 563 for { 564 c1, c2, c3 = c2, c3, r.readByte() 565 if c3 == 0 { // NUL or EOF, either is bad 566 return errCorruptObject 567 } 568 if c1 == '\n' && c2 == '!' && c3 == '\n' { 569 break 570 } 571 } 572 573 r.readFull(r.tmp[:8]) 574 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) { 575 return r.error(errCorruptObject) 576 } 577 578 b := r.readByte() 579 if b != 1 { 580 return r.error(errCorruptObject) 581 } 582 583 // Direct package dependencies. 584 for { 585 s := r.readString() 586 if s == "" { 587 break 588 } 589 r.p.Imports = append(r.p.Imports, s) 590 } 591 592 // Symbols. 593 for { 594 if b := r.readByte(); b != 0xfe { 595 if b != 0xff { 596 return r.error(errCorruptObject) 597 } 598 break 599 } 600 601 typ := r.readInt() 602 s := &Sym{SymID: r.readSymID()} 603 r.p.Syms = append(r.p.Syms, s) 604 s.Kind = SymKind(typ) 605 flags := r.readInt() 606 s.DupOK = flags&1 != 0 607 s.Size = r.readInt() 608 s.Type = r.readSymID() 609 s.Data = r.readData() 610 s.Reloc = make([]Reloc, r.readInt()) 611 for i := range s.Reloc { 612 rel := &s.Reloc[i] 613 rel.Offset = r.readInt() 614 rel.Size = r.readInt() 615 rel.Type = r.readInt() 616 rel.Add = r.readInt() 617 r.readInt() // Xadd - ignored 618 rel.Sym = r.readSymID() 619 r.readSymID() // Xsym - ignored 620 } 621 622 if s.Kind == STEXT { 623 f := new(Func) 624 s.Func = f 625 f.Args = r.readInt() 626 f.Frame = r.readInt() 627 flags := r.readInt() 628 f.Leaf = flags&1 != 0 629 f.NoSplit = r.readInt() != 0 630 f.Var = make([]Var, r.readInt()) 631 for i := range f.Var { 632 v := &f.Var[i] 633 v.Name = r.readSymID().Name 634 v.Offset = r.readInt() 635 v.Kind = r.readInt() 636 v.Type = r.readSymID() 637 } 638 639 f.PCSP = r.readData() 640 f.PCFile = r.readData() 641 f.PCLine = r.readData() 642 f.PCData = make([]Data, r.readInt()) 643 for i := range f.PCData { 644 f.PCData[i] = r.readData() 645 } 646 f.FuncData = make([]FuncData, r.readInt()) 647 for i := range f.FuncData { 648 f.FuncData[i].Sym = r.readSymID() 649 } 650 for i := range f.FuncData { 651 f.FuncData[i].Offset = int64(r.readInt()) // TODO 652 } 653 f.File = make([]string, r.readInt()) 654 for i := range f.File { 655 f.File[i] = r.readSymID().Name 656 } 657 } 658 } 659 660 r.readFull(r.tmp[:7]) 661 if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) { 662 return r.error(errCorruptObject) 663 } 664 665 return nil 666 }