github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 // TODO(rsc): Write tests. (File format still up in the air a little.) 10 package goobj 11 12 import ( 13 "bufio" 14 "bytes" 15 "cmd/internal/objabi" 16 "errors" 17 "fmt" 18 "io" 19 "strconv" 20 "strings" 21 ) 22 23 // A Sym is a named symbol in an object file. 24 type Sym struct { 25 SymID // symbol identifier (name and version) 26 Kind objabi.SymKind // kind of symbol 27 DupOK bool // are duplicate definitions okay? 28 Size int // size of corresponding data 29 Type SymID // symbol for Go type information 30 Data Data // memory image of symbol 31 Reloc []Reloc // relocations to apply to Data 32 Func *Func // additional data for functions 33 } 34 35 // A SymID - the combination of Name and Version - uniquely identifies 36 // a symbol within a package. 37 type SymID struct { 38 // Name is the name of a symbol. 39 Name string 40 41 // Version is zero for symbols with global visibility. 42 // Symbols with only file visibility (such as file-level static 43 // declarations in C) have a non-zero version distinguishing 44 // a symbol in one file from a symbol of the same name 45 // in another file 46 Version int 47 } 48 49 func (s SymID) String() string { 50 if s.Version == 0 { 51 return s.Name 52 } 53 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 54 } 55 56 // A Data is a reference to data stored in an object file. 57 // It records the offset and size of the data, so that a client can 58 // read the data only if necessary. 59 type Data struct { 60 Offset int64 61 Size int64 62 } 63 64 // A Reloc describes a relocation applied to a memory image to refer 65 // to an address within a particular symbol. 66 type Reloc struct { 67 // The bytes at [Offset, Offset+Size) within the containing Sym 68 // should be updated to refer to the address Add bytes after the start 69 // of the symbol Sym. 70 Offset int 71 Size int 72 Sym SymID 73 Add int 74 75 // The Type records the form of address expected in the bytes 76 // described by the previous fields: absolute, PC-relative, and so on. 77 // TODO(rsc): The interpretation of Type is not exposed by this package. 78 Type objabi.RelocType 79 } 80 81 // A Var describes a variable in a function stack frame: a declared 82 // local variable, an input argument, or an output result. 83 type Var struct { 84 // The combination of Name, Kind, and Offset uniquely 85 // identifies a variable in a function stack frame. 86 // Using fewer of these - in particular, using only Name - does not. 87 Name string // Name of variable. 88 Kind int // TODO(rsc): Define meaning. 89 Offset int // Frame offset. TODO(rsc): Define meaning. 90 91 Type SymID // Go type for variable. 92 } 93 94 // Func contains additional per-symbol information specific to functions. 95 type Func struct { 96 Args int // size in bytes of argument frame: inputs and outputs 97 Frame int // size in bytes of local variable frame 98 Leaf bool // function omits save of link register (ARM) 99 NoSplit bool // function omits stack split prologue 100 Var []Var // detail about local variables 101 PCSP Data // PC → SP offset map 102 PCFile Data // PC → file number map (index into File) 103 PCLine Data // PC → line number map 104 PCInline Data // PC → inline tree index map 105 PCData []Data // PC → runtime support data map 106 FuncData []FuncData // non-PC-specific runtime support data 107 File []string // paths indexed by PCFile 108 InlTree []InlinedCall 109 } 110 111 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 112 113 // A FuncData is a single function-specific data value. 114 type FuncData struct { 115 Sym SymID // symbol holding data 116 Offset int64 // offset into symbol for funcdata pointer 117 } 118 119 // An InlinedCall is a node in an InlTree. 120 // See cmd/internal/obj.InlTree for details. 121 type InlinedCall struct { 122 Parent int 123 File string 124 Line int 125 Func SymID 126 } 127 128 // A Package is a parsed Go object file or archive defining a Go package. 129 type Package struct { 130 ImportPath string // import path denoting this package 131 Imports []string // packages imported by this package 132 SymRefs []SymID // list of symbol names and versions referred to by this pack 133 Syms []*Sym // symbols defined by this package 134 MaxVersion int // maximum Version in any SymID in Syms 135 Arch string // architecture 136 } 137 138 var ( 139 archiveHeader = []byte("!<arch>\n") 140 archiveMagic = []byte("`\n") 141 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 142 143 errCorruptArchive = errors.New("corrupt archive") 144 errTruncatedArchive = errors.New("truncated archive") 145 errCorruptObject = errors.New("corrupt object file") 146 errNotObject = errors.New("unrecognized object file format") 147 ) 148 149 // An objReader is an object file reader. 150 type objReader struct { 151 p *Package 152 b *bufio.Reader 153 f io.ReadSeeker 154 err error 155 offset int64 156 dataOffset int64 157 limit int64 158 tmp [256]byte 159 pkgprefix string 160 } 161 162 // init initializes r to read package p from f. 163 func (r *objReader) init(f io.ReadSeeker, p *Package) { 164 r.f = f 165 r.p = p 166 r.offset, _ = f.Seek(0, io.SeekCurrent) 167 r.limit, _ = f.Seek(0, io.SeekEnd) 168 f.Seek(r.offset, io.SeekStart) 169 r.b = bufio.NewReader(f) 170 r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "." 171 } 172 173 // error records that an error occurred. 174 // It returns only the first error, so that an error 175 // caused by an earlier error does not discard information 176 // about the earlier error. 177 func (r *objReader) error(err error) error { 178 if r.err == nil { 179 if err == io.EOF { 180 err = io.ErrUnexpectedEOF 181 } 182 r.err = err 183 } 184 // panic("corrupt") // useful for debugging 185 return r.err 186 } 187 188 // readByte reads and returns a byte from the input file. 189 // On I/O error or EOF, it records the error but returns byte 0. 190 // A sequence of 0 bytes will eventually terminate any 191 // parsing state in the object file. In particular, it ends the 192 // reading of a varint. 193 func (r *objReader) readByte() byte { 194 if r.err != nil { 195 return 0 196 } 197 if r.offset >= r.limit { 198 r.error(io.ErrUnexpectedEOF) 199 return 0 200 } 201 b, err := r.b.ReadByte() 202 if err != nil { 203 if err == io.EOF { 204 err = io.ErrUnexpectedEOF 205 } 206 r.error(err) 207 b = 0 208 } else { 209 r.offset++ 210 } 211 return b 212 } 213 214 // read reads exactly len(b) bytes from the input file. 215 // If an error occurs, read returns the error but also 216 // records it, so it is safe for callers to ignore the result 217 // as long as delaying the report is not a problem. 218 func (r *objReader) readFull(b []byte) error { 219 if r.err != nil { 220 return r.err 221 } 222 if r.offset+int64(len(b)) > r.limit { 223 return r.error(io.ErrUnexpectedEOF) 224 } 225 n, err := io.ReadFull(r.b, b) 226 r.offset += int64(n) 227 if err != nil { 228 return r.error(err) 229 } 230 return nil 231 } 232 233 // readInt reads a zigzag varint from the input file. 234 func (r *objReader) readInt() int { 235 var u uint64 236 237 for shift := uint(0); ; shift += 7 { 238 if shift >= 64 { 239 r.error(errCorruptObject) 240 return 0 241 } 242 c := r.readByte() 243 u |= uint64(c&0x7F) << shift 244 if c&0x80 == 0 { 245 break 246 } 247 } 248 249 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 250 if int64(int(v)) != v { 251 r.error(errCorruptObject) // TODO 252 return 0 253 } 254 return int(v) 255 } 256 257 // readString reads a length-delimited string from the input file. 258 func (r *objReader) readString() string { 259 n := r.readInt() 260 buf := make([]byte, n) 261 r.readFull(buf) 262 return string(buf) 263 } 264 265 // readSymID reads a SymID from the input file. 266 func (r *objReader) readSymID() SymID { 267 i := r.readInt() 268 return r.p.SymRefs[i] 269 } 270 271 func (r *objReader) readRef() { 272 name, vers := r.readString(), r.readInt() 273 274 // In a symbol name in an object file, "". denotes the 275 // prefix for the package in which the object file has been found. 276 // Expand it. 277 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 278 279 // An individual object file only records version 0 (extern) or 1 (static). 280 // To make static symbols unique across all files being read, we 281 // replace version 1 with the version corresponding to the current 282 // file number. The number is incremented on each call to parseObject. 283 if vers != 0 { 284 vers = r.p.MaxVersion 285 } 286 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 287 } 288 289 // readData reads a data reference from the input file. 290 func (r *objReader) readData() Data { 291 n := r.readInt() 292 d := Data{Offset: r.dataOffset, Size: int64(n)} 293 r.dataOffset += int64(n) 294 return d 295 } 296 297 // skip skips n bytes in the input. 298 func (r *objReader) skip(n int64) { 299 if n < 0 { 300 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 301 } 302 if n < int64(len(r.tmp)) { 303 // Since the data is so small, a just reading from the buffered 304 // reader is better than flushing the buffer and seeking. 305 r.readFull(r.tmp[:n]) 306 } else if n <= int64(r.b.Buffered()) { 307 // Even though the data is not small, it has already been read. 308 // Advance the buffer instead of seeking. 309 for n > int64(len(r.tmp)) { 310 r.readFull(r.tmp[:]) 311 n -= int64(len(r.tmp)) 312 } 313 r.readFull(r.tmp[:n]) 314 } else { 315 // Seek, giving up buffered data. 316 _, err := r.f.Seek(r.offset+n, io.SeekStart) 317 if err != nil { 318 r.error(err) 319 } 320 r.offset += n 321 r.b.Reset(r.f) 322 } 323 } 324 325 // Parse parses an object file or archive from r, 326 // assuming that its import path is pkgpath. 327 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { 328 if pkgpath == "" { 329 pkgpath = `""` 330 } 331 p := new(Package) 332 p.ImportPath = pkgpath 333 334 var rd objReader 335 rd.init(r, p) 336 err := rd.readFull(rd.tmp[:8]) 337 if err != nil { 338 if err == io.EOF { 339 err = io.ErrUnexpectedEOF 340 } 341 return nil, err 342 } 343 344 switch { 345 default: 346 return nil, errNotObject 347 348 case bytes.Equal(rd.tmp[:8], archiveHeader): 349 if err := rd.parseArchive(); err != nil { 350 return nil, err 351 } 352 case bytes.Equal(rd.tmp[:8], goobjHeader): 353 if err := rd.parseObject(goobjHeader); err != nil { 354 return nil, err 355 } 356 } 357 358 return p, nil 359 } 360 361 // trimSpace removes trailing spaces from b and returns the corresponding string. 362 // This effectively parses the form used in archive headers. 363 func trimSpace(b []byte) string { 364 return string(bytes.TrimRight(b, " ")) 365 } 366 367 // parseArchive parses a Unix archive of Go object files. 368 // TODO(rsc): Need to skip non-Go object files. 369 // TODO(rsc): Maybe record table of contents in r.p so that 370 // linker can avoid having code to parse archives too. 371 func (r *objReader) parseArchive() error { 372 for r.offset < r.limit { 373 if err := r.readFull(r.tmp[:60]); err != nil { 374 return err 375 } 376 data := r.tmp[:60] 377 378 // Each file is preceded by this text header (slice indices in first column): 379 // 0:16 name 380 // 16:28 date 381 // 28:34 uid 382 // 34:40 gid 383 // 40:48 mode 384 // 48:58 size 385 // 58:60 magic - `\n 386 // We only care about name, size, and magic. 387 // The fields are space-padded on the right. 388 // The size is in decimal. 389 // The file data - size bytes - follows the header. 390 // Headers are 2-byte aligned, so if size is odd, an extra padding 391 // byte sits between the file data and the next header. 392 // The file data that follows is padded to an even number of bytes: 393 // if size is odd, an extra padding byte is inserted betw the next header. 394 if len(data) < 60 { 395 return errTruncatedArchive 396 } 397 if !bytes.Equal(data[58:60], archiveMagic) { 398 return errCorruptArchive 399 } 400 name := trimSpace(data[0:16]) 401 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 402 if err != nil { 403 return errCorruptArchive 404 } 405 data = data[60:] 406 fsize := size + size&1 407 if fsize < 0 || fsize < size { 408 return errCorruptArchive 409 } 410 switch name { 411 case "__.PKGDEF": 412 r.skip(size) 413 default: 414 oldLimit := r.limit 415 r.limit = r.offset + size 416 if err := r.parseObject(nil); err != nil { 417 return fmt.Errorf("parsing archive member %q: %v", name, err) 418 } 419 r.skip(r.limit - r.offset) 420 r.limit = oldLimit 421 } 422 if size&1 != 0 { 423 r.skip(1) 424 } 425 } 426 return nil 427 } 428 429 // parseObject parses a single Go object file. 430 // The prefix is the bytes already read from the file, 431 // typically in order to detect that this is an object file. 432 // The object file consists of a textual header ending in "\n!\n" 433 // and then the part we want to parse begins. 434 // The format of that part is defined in a comment at the top 435 // of src/liblink/objfile.c. 436 func (r *objReader) parseObject(prefix []byte) error { 437 r.p.MaxVersion++ 438 h := make([]byte, 0, 256) 439 h = append(h, prefix...) 440 var c1, c2, c3 byte 441 for { 442 c1, c2, c3 = c2, c3, r.readByte() 443 h = append(h, c3) 444 // The new export format can contain 0 bytes. 445 // Don't consider them errors, only look for r.err != nil. 446 if r.err != nil { 447 return errCorruptObject 448 } 449 if c1 == '\n' && c2 == '!' && c3 == '\n' { 450 break 451 } 452 } 453 454 hs := strings.Fields(string(h)) 455 if len(hs) >= 4 { 456 r.p.Arch = hs[3] 457 } 458 // TODO: extract OS + build ID if/when we need it 459 460 r.readFull(r.tmp[:8]) 461 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) { 462 return r.error(errCorruptObject) 463 } 464 465 b := r.readByte() 466 if b != 1 { 467 return r.error(errCorruptObject) 468 } 469 470 // Direct package dependencies. 471 for { 472 s := r.readString() 473 if s == "" { 474 break 475 } 476 r.p.Imports = append(r.p.Imports, s) 477 } 478 479 r.p.SymRefs = []SymID{{"", 0}} 480 for { 481 if b := r.readByte(); b != 0xfe { 482 if b != 0xff { 483 return r.error(errCorruptObject) 484 } 485 break 486 } 487 488 r.readRef() 489 } 490 491 dataLength := r.readInt() 492 r.readInt() // n relocations - ignore 493 r.readInt() // n pcdata - ignore 494 r.readInt() // n autom - ignore 495 r.readInt() // n funcdata - ignore 496 r.readInt() // n files - ignore 497 498 r.dataOffset = r.offset 499 r.skip(int64(dataLength)) 500 501 // Symbols. 502 for { 503 if b := r.readByte(); b != 0xfe { 504 if b != 0xff { 505 return r.error(errCorruptObject) 506 } 507 break 508 } 509 510 typ := r.readInt() 511 s := &Sym{SymID: r.readSymID()} 512 r.p.Syms = append(r.p.Syms, s) 513 s.Kind = objabi.SymKind(typ) 514 flags := r.readInt() 515 s.DupOK = flags&1 != 0 516 s.Size = r.readInt() 517 s.Type = r.readSymID() 518 s.Data = r.readData() 519 s.Reloc = make([]Reloc, r.readInt()) 520 for i := range s.Reloc { 521 rel := &s.Reloc[i] 522 rel.Offset = r.readInt() 523 rel.Size = r.readInt() 524 rel.Type = objabi.RelocType(r.readInt()) 525 rel.Add = r.readInt() 526 rel.Sym = r.readSymID() 527 } 528 529 if s.Kind == objabi.STEXT { 530 f := new(Func) 531 s.Func = f 532 f.Args = r.readInt() 533 f.Frame = r.readInt() 534 flags := r.readInt() 535 f.Leaf = flags&1 != 0 536 f.NoSplit = r.readInt() != 0 537 f.Var = make([]Var, r.readInt()) 538 for i := range f.Var { 539 v := &f.Var[i] 540 v.Name = r.readSymID().Name 541 v.Offset = r.readInt() 542 v.Kind = r.readInt() 543 v.Type = r.readSymID() 544 } 545 546 f.PCSP = r.readData() 547 f.PCFile = r.readData() 548 f.PCLine = r.readData() 549 f.PCInline = r.readData() 550 f.PCData = make([]Data, r.readInt()) 551 for i := range f.PCData { 552 f.PCData[i] = r.readData() 553 } 554 f.FuncData = make([]FuncData, r.readInt()) 555 for i := range f.FuncData { 556 f.FuncData[i].Sym = r.readSymID() 557 } 558 for i := range f.FuncData { 559 f.FuncData[i].Offset = int64(r.readInt()) // TODO 560 } 561 f.File = make([]string, r.readInt()) 562 for i := range f.File { 563 f.File[i] = r.readSymID().Name 564 } 565 f.InlTree = make([]InlinedCall, r.readInt()) 566 for i := range f.InlTree { 567 f.InlTree[i].Parent = r.readInt() 568 f.InlTree[i].File = r.readSymID().Name 569 f.InlTree[i].Line = r.readInt() 570 f.InlTree[i].Func = r.readSymID() 571 } 572 } 573 } 574 575 r.readFull(r.tmp[:7]) 576 if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) { 577 return r.error(errCorruptObject) 578 } 579 580 return nil 581 } 582 583 func (r *Reloc) String(insnOffset uint64) string { 584 delta := r.Offset - int(insnOffset) 585 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 586 if r.Sym.Name != "" { 587 if r.Add != 0 { 588 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 589 } 590 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 591 } 592 if r.Add != 0 { 593 return fmt.Sprintf("%s:%d", s, r.Add) 594 } 595 return s 596 }