github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 package goobj 10 11 import ( 12 "bufio" 13 "bytes" 14 "cmd/internal/objabi" 15 "errors" 16 "fmt" 17 "io" 18 "os" 19 "strconv" 20 "strings" 21 ) 22 23 // A Sym is a named symbol in an object file. 24 type Sym struct { 25 SymID // symbol identifier (name and version) 26 Kind objabi.SymKind // kind of symbol 27 DupOK bool // are duplicate definitions okay? 28 Size int // size of corresponding data 29 Type SymID // symbol for Go type information 30 Data Data // memory image of symbol 31 Reloc []Reloc // relocations to apply to Data 32 Func *Func // additional data for functions 33 } 34 35 // A SymID - the combination of Name and Version - uniquely identifies 36 // a symbol within a package. 37 type SymID struct { 38 // Name is the name of a symbol. 39 Name string 40 41 // Version is zero for symbols with global visibility. 42 // Symbols with only file visibility (such as file-level static 43 // declarations in C) have a non-zero version distinguishing 44 // a symbol in one file from a symbol of the same name 45 // in another file 46 Version int 47 } 48 49 func (s SymID) String() string { 50 if s.Version == 0 { 51 return s.Name 52 } 53 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 54 } 55 56 // A Data is a reference to data stored in an object file. 57 // It records the offset and size of the data, so that a client can 58 // read the data only if necessary. 59 type Data struct { 60 Offset int64 61 Size int64 62 } 63 64 // A Reloc describes a relocation applied to a memory image to refer 65 // to an address within a particular symbol. 66 type Reloc struct { 67 // The bytes at [Offset, Offset+Size) within the containing Sym 68 // should be updated to refer to the address Add bytes after the start 69 // of the symbol Sym. 70 Offset int 71 Size int 72 Sym SymID 73 Add int 74 75 // The Type records the form of address expected in the bytes 76 // described by the previous fields: absolute, PC-relative, and so on. 77 // TODO(rsc): The interpretation of Type is not exposed by this package. 78 Type objabi.RelocType 79 } 80 81 // A Var describes a variable in a function stack frame: a declared 82 // local variable, an input argument, or an output result. 83 type Var struct { 84 // The combination of Name, Kind, and Offset uniquely 85 // identifies a variable in a function stack frame. 86 // Using fewer of these - in particular, using only Name - does not. 87 Name string // Name of variable. 88 Kind int // TODO(rsc): Define meaning. 89 Offset int // Frame offset. TODO(rsc): Define meaning. 90 91 Type SymID // Go type for variable. 92 } 93 94 // Func contains additional per-symbol information specific to functions. 95 type Func struct { 96 Args int // size in bytes of argument frame: inputs and outputs 97 Frame int // size in bytes of local variable frame 98 Leaf bool // function omits save of link register (ARM) 99 NoSplit bool // function omits stack split prologue 100 Var []Var // detail about local variables 101 PCSP Data // PC → SP offset map 102 PCFile Data // PC → file number map (index into File) 103 PCLine Data // PC → line number map 104 PCInline Data // PC → inline tree index map 105 PCData []Data // PC → runtime support data map 106 FuncData []FuncData // non-PC-specific runtime support data 107 File []string // paths indexed by PCFile 108 InlTree []InlinedCall 109 } 110 111 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 112 113 // A FuncData is a single function-specific data value. 114 type FuncData struct { 115 Sym SymID // symbol holding data 116 Offset int64 // offset into symbol for funcdata pointer 117 } 118 119 // An InlinedCall is a node in an InlTree. 120 // See cmd/internal/obj.InlTree for details. 121 type InlinedCall struct { 122 Parent int 123 File string 124 Line int 125 Func SymID 126 } 127 128 // A Package is a parsed Go object file or archive defining a Go package. 129 type Package struct { 130 ImportPath string // import path denoting this package 131 Imports []string // packages imported by this package 132 SymRefs []SymID // list of symbol names and versions referred to by this pack 133 Syms []*Sym // symbols defined by this package 134 MaxVersion int // maximum Version in any SymID in Syms 135 Arch string // architecture 136 Native []*NativeReader // native object data (e.g. ELF) 137 } 138 139 type NativeReader struct { 140 Name string 141 io.ReaderAt 142 } 143 144 var ( 145 archiveHeader = []byte("!<arch>\n") 146 archiveMagic = []byte("`\n") 147 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 148 149 errCorruptArchive = errors.New("corrupt archive") 150 errTruncatedArchive = errors.New("truncated archive") 151 errCorruptObject = errors.New("corrupt object file") 152 errNotObject = errors.New("unrecognized object file format") 153 ) 154 155 // An objReader is an object file reader. 156 type objReader struct { 157 p *Package 158 b *bufio.Reader 159 f *os.File 160 err error 161 offset int64 162 dataOffset int64 163 limit int64 164 tmp [256]byte 165 pkgprefix string 166 } 167 168 // init initializes r to read package p from f. 169 func (r *objReader) init(f *os.File, p *Package) { 170 r.f = f 171 r.p = p 172 r.offset, _ = f.Seek(0, io.SeekCurrent) 173 r.limit, _ = f.Seek(0, io.SeekEnd) 174 f.Seek(r.offset, io.SeekStart) 175 r.b = bufio.NewReader(f) 176 r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "." 177 } 178 179 // error records that an error occurred. 180 // It returns only the first error, so that an error 181 // caused by an earlier error does not discard information 182 // about the earlier error. 183 func (r *objReader) error(err error) error { 184 if r.err == nil { 185 if err == io.EOF { 186 err = io.ErrUnexpectedEOF 187 } 188 r.err = err 189 } 190 // panic("corrupt") // useful for debugging 191 return r.err 192 } 193 194 // peek returns the next n bytes without advancing the reader. 195 func (r *objReader) peek(n int) ([]byte, error) { 196 if r.err != nil { 197 return nil, r.err 198 } 199 if r.offset >= r.limit { 200 r.error(io.ErrUnexpectedEOF) 201 return nil, r.err 202 } 203 b, err := r.b.Peek(n) 204 if err != nil { 205 if err != bufio.ErrBufferFull { 206 r.error(err) 207 } 208 } 209 return b, err 210 } 211 212 // readByte reads and returns a byte from the input file. 213 // On I/O error or EOF, it records the error but returns byte 0. 214 // A sequence of 0 bytes will eventually terminate any 215 // parsing state in the object file. In particular, it ends the 216 // reading of a varint. 217 func (r *objReader) readByte() byte { 218 if r.err != nil { 219 return 0 220 } 221 if r.offset >= r.limit { 222 r.error(io.ErrUnexpectedEOF) 223 return 0 224 } 225 b, err := r.b.ReadByte() 226 if err != nil { 227 if err == io.EOF { 228 err = io.ErrUnexpectedEOF 229 } 230 r.error(err) 231 b = 0 232 } else { 233 r.offset++ 234 } 235 return b 236 } 237 238 // read reads exactly len(b) bytes from the input file. 239 // If an error occurs, read returns the error but also 240 // records it, so it is safe for callers to ignore the result 241 // as long as delaying the report is not a problem. 242 func (r *objReader) readFull(b []byte) error { 243 if r.err != nil { 244 return r.err 245 } 246 if r.offset+int64(len(b)) > r.limit { 247 return r.error(io.ErrUnexpectedEOF) 248 } 249 n, err := io.ReadFull(r.b, b) 250 r.offset += int64(n) 251 if err != nil { 252 return r.error(err) 253 } 254 return nil 255 } 256 257 // readInt reads a zigzag varint from the input file. 258 func (r *objReader) readInt() int { 259 var u uint64 260 261 for shift := uint(0); ; shift += 7 { 262 if shift >= 64 { 263 r.error(errCorruptObject) 264 return 0 265 } 266 c := r.readByte() 267 u |= uint64(c&0x7F) << shift 268 if c&0x80 == 0 { 269 break 270 } 271 } 272 273 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 274 if int64(int(v)) != v { 275 r.error(errCorruptObject) // TODO 276 return 0 277 } 278 return int(v) 279 } 280 281 // readString reads a length-delimited string from the input file. 282 func (r *objReader) readString() string { 283 n := r.readInt() 284 buf := make([]byte, n) 285 r.readFull(buf) 286 return string(buf) 287 } 288 289 // readSymID reads a SymID from the input file. 290 func (r *objReader) readSymID() SymID { 291 i := r.readInt() 292 return r.p.SymRefs[i] 293 } 294 295 func (r *objReader) readRef() { 296 name, vers := r.readString(), r.readInt() 297 298 // In a symbol name in an object file, "". denotes the 299 // prefix for the package in which the object file has been found. 300 // Expand it. 301 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 302 303 // An individual object file only records version 0 (extern) or 1 (static). 304 // To make static symbols unique across all files being read, we 305 // replace version 1 with the version corresponding to the current 306 // file number. The number is incremented on each call to parseObject. 307 if vers != 0 { 308 vers = r.p.MaxVersion 309 } 310 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 311 } 312 313 // readData reads a data reference from the input file. 314 func (r *objReader) readData() Data { 315 n := r.readInt() 316 d := Data{Offset: r.dataOffset, Size: int64(n)} 317 r.dataOffset += int64(n) 318 return d 319 } 320 321 // skip skips n bytes in the input. 322 func (r *objReader) skip(n int64) { 323 if n < 0 { 324 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 325 } 326 if n < int64(len(r.tmp)) { 327 // Since the data is so small, a just reading from the buffered 328 // reader is better than flushing the buffer and seeking. 329 r.readFull(r.tmp[:n]) 330 } else if n <= int64(r.b.Buffered()) { 331 // Even though the data is not small, it has already been read. 332 // Advance the buffer instead of seeking. 333 for n > int64(len(r.tmp)) { 334 r.readFull(r.tmp[:]) 335 n -= int64(len(r.tmp)) 336 } 337 r.readFull(r.tmp[:n]) 338 } else { 339 // Seek, giving up buffered data. 340 _, err := r.f.Seek(r.offset+n, io.SeekStart) 341 if err != nil { 342 r.error(err) 343 } 344 r.offset += n 345 r.b.Reset(r.f) 346 } 347 } 348 349 // Parse parses an object file or archive from f, 350 // assuming that its import path is pkgpath. 351 func Parse(f *os.File, pkgpath string) (*Package, error) { 352 if pkgpath == "" { 353 pkgpath = `""` 354 } 355 p := new(Package) 356 p.ImportPath = pkgpath 357 358 var rd objReader 359 rd.init(f, p) 360 err := rd.readFull(rd.tmp[:8]) 361 if err != nil { 362 if err == io.EOF { 363 err = io.ErrUnexpectedEOF 364 } 365 return nil, err 366 } 367 368 switch { 369 default: 370 return nil, errNotObject 371 372 case bytes.Equal(rd.tmp[:8], archiveHeader): 373 if err := rd.parseArchive(); err != nil { 374 return nil, err 375 } 376 case bytes.Equal(rd.tmp[:8], goobjHeader): 377 if err := rd.parseObject(goobjHeader); err != nil { 378 return nil, err 379 } 380 } 381 382 return p, nil 383 } 384 385 // trimSpace removes trailing spaces from b and returns the corresponding string. 386 // This effectively parses the form used in archive headers. 387 func trimSpace(b []byte) string { 388 return string(bytes.TrimRight(b, " ")) 389 } 390 391 // parseArchive parses a Unix archive of Go object files. 392 func (r *objReader) parseArchive() error { 393 for r.offset < r.limit { 394 if err := r.readFull(r.tmp[:60]); err != nil { 395 return err 396 } 397 data := r.tmp[:60] 398 399 // Each file is preceded by this text header (slice indices in first column): 400 // 0:16 name 401 // 16:28 date 402 // 28:34 uid 403 // 34:40 gid 404 // 40:48 mode 405 // 48:58 size 406 // 58:60 magic - `\n 407 // We only care about name, size, and magic. 408 // The fields are space-padded on the right. 409 // The size is in decimal. 410 // The file data - size bytes - follows the header. 411 // Headers are 2-byte aligned, so if size is odd, an extra padding 412 // byte sits between the file data and the next header. 413 // The file data that follows is padded to an even number of bytes: 414 // if size is odd, an extra padding byte is inserted betw the next header. 415 if len(data) < 60 { 416 return errTruncatedArchive 417 } 418 if !bytes.Equal(data[58:60], archiveMagic) { 419 return errCorruptArchive 420 } 421 name := trimSpace(data[0:16]) 422 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 423 if err != nil { 424 return errCorruptArchive 425 } 426 data = data[60:] 427 fsize := size + size&1 428 if fsize < 0 || fsize < size { 429 return errCorruptArchive 430 } 431 switch name { 432 case "__.PKGDEF": 433 r.skip(size) 434 default: 435 oldLimit := r.limit 436 r.limit = r.offset + size 437 438 p, err := r.peek(8) 439 if err != nil { 440 return err 441 } 442 if bytes.Equal(p, goobjHeader) { 443 if err := r.parseObject(nil); err != nil { 444 return fmt.Errorf("parsing archive member %q: %v", name, err) 445 } 446 } else { 447 r.p.Native = append(r.p.Native, &NativeReader{ 448 Name: name, 449 ReaderAt: io.NewSectionReader(r.f, r.offset, size), 450 }) 451 } 452 453 r.skip(r.limit - r.offset) 454 r.limit = oldLimit 455 } 456 if size&1 != 0 { 457 r.skip(1) 458 } 459 } 460 return nil 461 } 462 463 // parseObject parses a single Go object file. 464 // The prefix is the bytes already read from the file, 465 // typically in order to detect that this is an object file. 466 // The object file consists of a textual header ending in "\n!\n" 467 // and then the part we want to parse begins. 468 // The format of that part is defined in a comment at the top 469 // of src/liblink/objfile.c. 470 func (r *objReader) parseObject(prefix []byte) error { 471 r.p.MaxVersion++ 472 h := make([]byte, 0, 256) 473 h = append(h, prefix...) 474 var c1, c2, c3 byte 475 for { 476 c1, c2, c3 = c2, c3, r.readByte() 477 h = append(h, c3) 478 // The new export format can contain 0 bytes. 479 // Don't consider them errors, only look for r.err != nil. 480 if r.err != nil { 481 return errCorruptObject 482 } 483 if c1 == '\n' && c2 == '!' && c3 == '\n' { 484 break 485 } 486 } 487 488 hs := strings.Fields(string(h)) 489 if len(hs) >= 4 { 490 r.p.Arch = hs[3] 491 } 492 // TODO: extract OS + build ID if/when we need it 493 494 r.readFull(r.tmp[:8]) 495 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) { 496 return r.error(errCorruptObject) 497 } 498 499 b := r.readByte() 500 if b != 1 { 501 return r.error(errCorruptObject) 502 } 503 504 // Direct package dependencies. 505 for { 506 s := r.readString() 507 if s == "" { 508 break 509 } 510 r.p.Imports = append(r.p.Imports, s) 511 } 512 513 r.p.SymRefs = []SymID{{"", 0}} 514 for { 515 if b := r.readByte(); b != 0xfe { 516 if b != 0xff { 517 return r.error(errCorruptObject) 518 } 519 break 520 } 521 522 r.readRef() 523 } 524 525 dataLength := r.readInt() 526 r.readInt() // n relocations - ignore 527 r.readInt() // n pcdata - ignore 528 r.readInt() // n autom - ignore 529 r.readInt() // n funcdata - ignore 530 r.readInt() // n files - ignore 531 532 r.dataOffset = r.offset 533 r.skip(int64(dataLength)) 534 535 // Symbols. 536 for { 537 if b := r.readByte(); b != 0xfe { 538 if b != 0xff { 539 return r.error(errCorruptObject) 540 } 541 break 542 } 543 544 typ := r.readByte() 545 s := &Sym{SymID: r.readSymID()} 546 r.p.Syms = append(r.p.Syms, s) 547 s.Kind = objabi.SymKind(typ) 548 flags := r.readInt() 549 s.DupOK = flags&1 != 0 550 s.Size = r.readInt() 551 s.Type = r.readSymID() 552 s.Data = r.readData() 553 s.Reloc = make([]Reloc, r.readInt()) 554 for i := range s.Reloc { 555 rel := &s.Reloc[i] 556 rel.Offset = r.readInt() 557 rel.Size = r.readInt() 558 rel.Type = objabi.RelocType(r.readInt()) 559 rel.Add = r.readInt() 560 rel.Sym = r.readSymID() 561 } 562 563 if s.Kind == objabi.STEXT { 564 f := new(Func) 565 s.Func = f 566 f.Args = r.readInt() 567 f.Frame = r.readInt() 568 flags := r.readInt() 569 f.Leaf = flags&(1<<0) != 0 570 f.NoSplit = r.readInt() != 0 571 f.Var = make([]Var, r.readInt()) 572 for i := range f.Var { 573 v := &f.Var[i] 574 v.Name = r.readSymID().Name 575 v.Offset = r.readInt() 576 v.Kind = r.readInt() 577 v.Type = r.readSymID() 578 } 579 580 f.PCSP = r.readData() 581 f.PCFile = r.readData() 582 f.PCLine = r.readData() 583 f.PCInline = r.readData() 584 f.PCData = make([]Data, r.readInt()) 585 for i := range f.PCData { 586 f.PCData[i] = r.readData() 587 } 588 f.FuncData = make([]FuncData, r.readInt()) 589 for i := range f.FuncData { 590 f.FuncData[i].Sym = r.readSymID() 591 } 592 for i := range f.FuncData { 593 f.FuncData[i].Offset = int64(r.readInt()) // TODO 594 } 595 f.File = make([]string, r.readInt()) 596 for i := range f.File { 597 f.File[i] = r.readSymID().Name 598 } 599 f.InlTree = make([]InlinedCall, r.readInt()) 600 for i := range f.InlTree { 601 f.InlTree[i].Parent = r.readInt() 602 f.InlTree[i].File = r.readSymID().Name 603 f.InlTree[i].Line = r.readInt() 604 f.InlTree[i].Func = r.readSymID() 605 } 606 } 607 } 608 609 r.readFull(r.tmp[:7]) 610 if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) { 611 return r.error(errCorruptObject) 612 } 613 614 return nil 615 } 616 617 func (r *Reloc) String(insnOffset uint64) string { 618 delta := r.Offset - int(insnOffset) 619 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 620 if r.Sym.Name != "" { 621 if r.Add != 0 { 622 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 623 } 624 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 625 } 626 if r.Add != 0 { 627 return fmt.Sprintf("%s:%d", s, r.Add) 628 } 629 return s 630 }