github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 package goobj 10 11 import ( 12 "bufio" 13 "bytes" 14 "cmd/internal/objabi" 15 "errors" 16 "fmt" 17 "io" 18 "os" 19 "strconv" 20 "strings" 21 ) 22 23 // A Sym is a named symbol in an object file. 24 type Sym struct { 25 SymID // symbol identifier (name and version) 26 Kind objabi.SymKind // kind of symbol 27 DupOK bool // are duplicate definitions okay? 28 Size int64 // size of corresponding data 29 Type SymID // symbol for Go type information 30 Data Data // memory image of symbol 31 Reloc []Reloc // relocations to apply to Data 32 Func *Func // additional data for functions 33 } 34 35 // A SymID - the combination of Name and Version - uniquely identifies 36 // a symbol within a package. 37 type SymID struct { 38 // Name is the name of a symbol. 39 Name string 40 41 // Version is zero for symbols with global visibility. 42 // Symbols with only file visibility (such as file-level static 43 // declarations in C) have a non-zero version distinguishing 44 // a symbol in one file from a symbol of the same name 45 // in another file 46 Version int64 47 } 48 49 func (s SymID) String() string { 50 if s.Version == 0 { 51 return s.Name 52 } 53 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 54 } 55 56 // A Data is a reference to data stored in an object file. 57 // It records the offset and size of the data, so that a client can 58 // read the data only if necessary. 59 type Data struct { 60 Offset int64 61 Size int64 62 } 63 64 // A Reloc describes a relocation applied to a memory image to refer 65 // to an address within a particular symbol. 66 type Reloc struct { 67 // The bytes at [Offset, Offset+Size) within the containing Sym 68 // should be updated to refer to the address Add bytes after the start 69 // of the symbol Sym. 70 Offset int64 71 Size int64 72 Sym SymID 73 Add int64 74 75 // The Type records the form of address expected in the bytes 76 // described by the previous fields: absolute, PC-relative, and so on. 77 // TODO(rsc): The interpretation of Type is not exposed by this package. 78 Type objabi.RelocType 79 } 80 81 // A Var describes a variable in a function stack frame: a declared 82 // local variable, an input argument, or an output result. 83 type Var struct { 84 // The combination of Name, Kind, and Offset uniquely 85 // identifies a variable in a function stack frame. 86 // Using fewer of these - in particular, using only Name - does not. 87 Name string // Name of variable. 88 Kind int64 // TODO(rsc): Define meaning. 89 Offset int64 // Frame offset. TODO(rsc): Define meaning. 90 91 Type SymID // Go type for variable. 92 } 93 94 // Func contains additional per-symbol information specific to functions. 95 type Func struct { 96 Args int64 // size in bytes of argument frame: inputs and outputs 97 Frame int64 // size in bytes of local variable frame 98 Leaf bool // function omits save of link register (ARM) 99 NoSplit bool // function omits stack split prologue 100 Var []Var // detail about local variables 101 PCSP Data // PC → SP offset map 102 PCFile Data // PC → file number map (index into File) 103 PCLine Data // PC → line number map 104 PCInline Data // PC → inline tree index map 105 PCData []Data // PC → runtime support data map 106 FuncData []FuncData // non-PC-specific runtime support data 107 File []string // paths indexed by PCFile 108 InlTree []InlinedCall 109 } 110 111 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 112 113 // A FuncData is a single function-specific data value. 114 type FuncData struct { 115 Sym SymID // symbol holding data 116 Offset int64 // offset into symbol for funcdata pointer 117 } 118 119 // An InlinedCall is a node in an InlTree. 120 // See cmd/internal/obj.InlTree for details. 121 type InlinedCall struct { 122 Parent int64 123 File string 124 Line int64 125 Func SymID 126 } 127 128 // A Package is a parsed Go object file or archive defining a Go package. 129 type Package struct { 130 ImportPath string // import path denoting this package 131 Imports []string // packages imported by this package 132 SymRefs []SymID // list of symbol names and versions referred to by this pack 133 Syms []*Sym // symbols defined by this package 134 MaxVersion int64 // maximum Version in any SymID in Syms 135 Arch string // architecture 136 Native []*NativeReader // native object data (e.g. ELF) 137 } 138 139 type NativeReader struct { 140 Name string 141 io.ReaderAt 142 } 143 144 var ( 145 archiveHeader = []byte("!<arch>\n") 146 archiveMagic = []byte("`\n") 147 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 148 149 errCorruptArchive = errors.New("corrupt archive") 150 errTruncatedArchive = errors.New("truncated archive") 151 errCorruptObject = errors.New("corrupt object file") 152 errNotObject = errors.New("unrecognized object file format") 153 ) 154 155 // An objReader is an object file reader. 156 type objReader struct { 157 p *Package 158 b *bufio.Reader 159 f *os.File 160 err error 161 offset int64 162 dataOffset int64 163 limit int64 164 tmp [256]byte 165 pkgprefix string 166 } 167 168 // init initializes r to read package p from f. 169 func (r *objReader) init(f *os.File, p *Package) { 170 r.f = f 171 r.p = p 172 r.offset, _ = f.Seek(0, io.SeekCurrent) 173 r.limit, _ = f.Seek(0, io.SeekEnd) 174 f.Seek(r.offset, io.SeekStart) 175 r.b = bufio.NewReader(f) 176 r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "." 177 } 178 179 // error records that an error occurred. 180 // It returns only the first error, so that an error 181 // caused by an earlier error does not discard information 182 // about the earlier error. 183 func (r *objReader) error(err error) error { 184 if r.err == nil { 185 if err == io.EOF { 186 err = io.ErrUnexpectedEOF 187 } 188 r.err = err 189 } 190 // panic("corrupt") // useful for debugging 191 return r.err 192 } 193 194 // peek returns the next n bytes without advancing the reader. 195 func (r *objReader) peek(n int) ([]byte, error) { 196 if r.err != nil { 197 return nil, r.err 198 } 199 if r.offset >= r.limit { 200 r.error(io.ErrUnexpectedEOF) 201 return nil, r.err 202 } 203 b, err := r.b.Peek(n) 204 if err != nil { 205 if err != bufio.ErrBufferFull { 206 r.error(err) 207 } 208 } 209 return b, err 210 } 211 212 // readByte reads and returns a byte from the input file. 213 // On I/O error or EOF, it records the error but returns byte 0. 214 // A sequence of 0 bytes will eventually terminate any 215 // parsing state in the object file. In particular, it ends the 216 // reading of a varint. 217 func (r *objReader) readByte() byte { 218 if r.err != nil { 219 return 0 220 } 221 if r.offset >= r.limit { 222 r.error(io.ErrUnexpectedEOF) 223 return 0 224 } 225 b, err := r.b.ReadByte() 226 if err != nil { 227 if err == io.EOF { 228 err = io.ErrUnexpectedEOF 229 } 230 r.error(err) 231 b = 0 232 } else { 233 r.offset++ 234 } 235 return b 236 } 237 238 // read reads exactly len(b) bytes from the input file. 239 // If an error occurs, read returns the error but also 240 // records it, so it is safe for callers to ignore the result 241 // as long as delaying the report is not a problem. 242 func (r *objReader) readFull(b []byte) error { 243 if r.err != nil { 244 return r.err 245 } 246 if r.offset+int64(len(b)) > r.limit { 247 return r.error(io.ErrUnexpectedEOF) 248 } 249 n, err := io.ReadFull(r.b, b) 250 r.offset += int64(n) 251 if err != nil { 252 return r.error(err) 253 } 254 return nil 255 } 256 257 // readInt reads a zigzag varint from the input file. 258 func (r *objReader) readInt() int64 { 259 var u uint64 260 261 for shift := uint(0); ; shift += 7 { 262 if shift >= 64 { 263 r.error(errCorruptObject) 264 return 0 265 } 266 c := r.readByte() 267 u |= uint64(c&0x7F) << shift 268 if c&0x80 == 0 { 269 break 270 } 271 } 272 273 return int64(u>>1) ^ (int64(u) << 63 >> 63) 274 } 275 276 // readString reads a length-delimited string from the input file. 277 func (r *objReader) readString() string { 278 n := r.readInt() 279 buf := make([]byte, n) 280 r.readFull(buf) 281 return string(buf) 282 } 283 284 // readSymID reads a SymID from the input file. 285 func (r *objReader) readSymID() SymID { 286 i := r.readInt() 287 return r.p.SymRefs[i] 288 } 289 290 func (r *objReader) readRef() { 291 name, vers := r.readString(), r.readInt() 292 293 // In a symbol name in an object file, "". denotes the 294 // prefix for the package in which the object file has been found. 295 // Expand it. 296 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 297 298 // An individual object file only records version 0 (extern) or 1 (static). 299 // To make static symbols unique across all files being read, we 300 // replace version 1 with the version corresponding to the current 301 // file number. The number is incremented on each call to parseObject. 302 if vers != 0 { 303 vers = r.p.MaxVersion 304 } 305 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 306 } 307 308 // readData reads a data reference from the input file. 309 func (r *objReader) readData() Data { 310 n := r.readInt() 311 d := Data{Offset: r.dataOffset, Size: n} 312 r.dataOffset += n 313 return d 314 } 315 316 // skip skips n bytes in the input. 317 func (r *objReader) skip(n int64) { 318 if n < 0 { 319 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 320 } 321 if n < int64(len(r.tmp)) { 322 // Since the data is so small, a just reading from the buffered 323 // reader is better than flushing the buffer and seeking. 324 r.readFull(r.tmp[:n]) 325 } else if n <= int64(r.b.Buffered()) { 326 // Even though the data is not small, it has already been read. 327 // Advance the buffer instead of seeking. 328 for n > int64(len(r.tmp)) { 329 r.readFull(r.tmp[:]) 330 n -= int64(len(r.tmp)) 331 } 332 r.readFull(r.tmp[:n]) 333 } else { 334 // Seek, giving up buffered data. 335 _, err := r.f.Seek(r.offset+n, io.SeekStart) 336 if err != nil { 337 r.error(err) 338 } 339 r.offset += n 340 r.b.Reset(r.f) 341 } 342 } 343 344 // Parse parses an object file or archive from f, 345 // assuming that its import path is pkgpath. 346 func Parse(f *os.File, pkgpath string) (*Package, error) { 347 if pkgpath == "" { 348 pkgpath = `""` 349 } 350 p := new(Package) 351 p.ImportPath = pkgpath 352 353 var rd objReader 354 rd.init(f, p) 355 err := rd.readFull(rd.tmp[:8]) 356 if err != nil { 357 if err == io.EOF { 358 err = io.ErrUnexpectedEOF 359 } 360 return nil, err 361 } 362 363 switch { 364 default: 365 return nil, errNotObject 366 367 case bytes.Equal(rd.tmp[:8], archiveHeader): 368 if err := rd.parseArchive(); err != nil { 369 return nil, err 370 } 371 case bytes.Equal(rd.tmp[:8], goobjHeader): 372 if err := rd.parseObject(goobjHeader); err != nil { 373 return nil, err 374 } 375 } 376 377 return p, nil 378 } 379 380 // trimSpace removes trailing spaces from b and returns the corresponding string. 381 // This effectively parses the form used in archive headers. 382 func trimSpace(b []byte) string { 383 return string(bytes.TrimRight(b, " ")) 384 } 385 386 // parseArchive parses a Unix archive of Go object files. 387 func (r *objReader) parseArchive() error { 388 for r.offset < r.limit { 389 if err := r.readFull(r.tmp[:60]); err != nil { 390 return err 391 } 392 data := r.tmp[:60] 393 394 // Each file is preceded by this text header (slice indices in first column): 395 // 0:16 name 396 // 16:28 date 397 // 28:34 uid 398 // 34:40 gid 399 // 40:48 mode 400 // 48:58 size 401 // 58:60 magic - `\n 402 // We only care about name, size, and magic. 403 // The fields are space-padded on the right. 404 // The size is in decimal. 405 // The file data - size bytes - follows the header. 406 // Headers are 2-byte aligned, so if size is odd, an extra padding 407 // byte sits between the file data and the next header. 408 // The file data that follows is padded to an even number of bytes: 409 // if size is odd, an extra padding byte is inserted betw the next header. 410 if len(data) < 60 { 411 return errTruncatedArchive 412 } 413 if !bytes.Equal(data[58:60], archiveMagic) { 414 return errCorruptArchive 415 } 416 name := trimSpace(data[0:16]) 417 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 418 if err != nil { 419 return errCorruptArchive 420 } 421 data = data[60:] 422 fsize := size + size&1 423 if fsize < 0 || fsize < size { 424 return errCorruptArchive 425 } 426 switch name { 427 case "__.PKGDEF": 428 r.skip(size) 429 default: 430 oldLimit := r.limit 431 r.limit = r.offset + size 432 433 p, err := r.peek(8) 434 if err != nil { 435 return err 436 } 437 if bytes.Equal(p, goobjHeader) { 438 if err := r.parseObject(nil); err != nil { 439 return fmt.Errorf("parsing archive member %q: %v", name, err) 440 } 441 } else { 442 r.p.Native = append(r.p.Native, &NativeReader{ 443 Name: name, 444 ReaderAt: io.NewSectionReader(r.f, r.offset, size), 445 }) 446 } 447 448 r.skip(r.limit - r.offset) 449 r.limit = oldLimit 450 } 451 if size&1 != 0 { 452 r.skip(1) 453 } 454 } 455 return nil 456 } 457 458 // parseObject parses a single Go object file. 459 // The prefix is the bytes already read from the file, 460 // typically in order to detect that this is an object file. 461 // The object file consists of a textual header ending in "\n!\n" 462 // and then the part we want to parse begins. 463 // The format of that part is defined in a comment at the top 464 // of src/liblink/objfile.c. 465 func (r *objReader) parseObject(prefix []byte) error { 466 r.p.MaxVersion++ 467 h := make([]byte, 0, 256) 468 h = append(h, prefix...) 469 var c1, c2, c3 byte 470 for { 471 c1, c2, c3 = c2, c3, r.readByte() 472 h = append(h, c3) 473 // The new export format can contain 0 bytes. 474 // Don't consider them errors, only look for r.err != nil. 475 if r.err != nil { 476 return errCorruptObject 477 } 478 if c1 == '\n' && c2 == '!' && c3 == '\n' { 479 break 480 } 481 } 482 483 hs := strings.Fields(string(h)) 484 if len(hs) >= 4 { 485 r.p.Arch = hs[3] 486 } 487 // TODO: extract OS + build ID if/when we need it 488 489 r.readFull(r.tmp[:8]) 490 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) { 491 return r.error(errCorruptObject) 492 } 493 494 b := r.readByte() 495 if b != 1 { 496 return r.error(errCorruptObject) 497 } 498 499 // Direct package dependencies. 500 for { 501 s := r.readString() 502 if s == "" { 503 break 504 } 505 r.p.Imports = append(r.p.Imports, s) 506 } 507 508 r.p.SymRefs = []SymID{{"", 0}} 509 for { 510 if b := r.readByte(); b != 0xfe { 511 if b != 0xff { 512 return r.error(errCorruptObject) 513 } 514 break 515 } 516 517 r.readRef() 518 } 519 520 dataLength := r.readInt() 521 r.readInt() // n relocations - ignore 522 r.readInt() // n pcdata - ignore 523 r.readInt() // n autom - ignore 524 r.readInt() // n funcdata - ignore 525 r.readInt() // n files - ignore 526 527 r.dataOffset = r.offset 528 r.skip(dataLength) 529 530 // Symbols. 531 for { 532 if b := r.readByte(); b != 0xfe { 533 if b != 0xff { 534 return r.error(errCorruptObject) 535 } 536 break 537 } 538 539 typ := r.readByte() 540 s := &Sym{SymID: r.readSymID()} 541 r.p.Syms = append(r.p.Syms, s) 542 s.Kind = objabi.SymKind(typ) 543 flags := r.readInt() 544 s.DupOK = flags&1 != 0 545 s.Size = r.readInt() 546 s.Type = r.readSymID() 547 s.Data = r.readData() 548 s.Reloc = make([]Reloc, r.readInt()) 549 for i := range s.Reloc { 550 rel := &s.Reloc[i] 551 rel.Offset = r.readInt() 552 rel.Size = r.readInt() 553 rel.Type = objabi.RelocType(r.readInt()) 554 rel.Add = r.readInt() 555 rel.Sym = r.readSymID() 556 } 557 558 if s.Kind == objabi.STEXT { 559 f := new(Func) 560 s.Func = f 561 f.Args = r.readInt() 562 f.Frame = r.readInt() 563 flags := r.readInt() 564 f.Leaf = flags&(1<<0) != 0 565 f.NoSplit = r.readInt() != 0 566 f.Var = make([]Var, r.readInt()) 567 for i := range f.Var { 568 v := &f.Var[i] 569 v.Name = r.readSymID().Name 570 v.Offset = r.readInt() 571 v.Kind = r.readInt() 572 v.Type = r.readSymID() 573 } 574 575 f.PCSP = r.readData() 576 f.PCFile = r.readData() 577 f.PCLine = r.readData() 578 f.PCInline = r.readData() 579 f.PCData = make([]Data, r.readInt()) 580 for i := range f.PCData { 581 f.PCData[i] = r.readData() 582 } 583 f.FuncData = make([]FuncData, r.readInt()) 584 for i := range f.FuncData { 585 f.FuncData[i].Sym = r.readSymID() 586 } 587 for i := range f.FuncData { 588 f.FuncData[i].Offset = int64(r.readInt()) // TODO 589 } 590 f.File = make([]string, r.readInt()) 591 for i := range f.File { 592 f.File[i] = r.readSymID().Name 593 } 594 f.InlTree = make([]InlinedCall, r.readInt()) 595 for i := range f.InlTree { 596 f.InlTree[i].Parent = r.readInt() 597 f.InlTree[i].File = r.readSymID().Name 598 f.InlTree[i].Line = r.readInt() 599 f.InlTree[i].Func = r.readSymID() 600 } 601 } 602 } 603 604 r.readFull(r.tmp[:7]) 605 if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) { 606 return r.error(errCorruptObject) 607 } 608 609 return nil 610 } 611 612 func (r *Reloc) String(insnOffset uint64) string { 613 delta := r.Offset - int64(insnOffset) 614 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 615 if r.Sym.Name != "" { 616 if r.Add != 0 { 617 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 618 } 619 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 620 } 621 if r.Add != 0 { 622 return fmt.Sprintf("%s:%d", s, r.Add) 623 } 624 return s 625 }