github.com/gernest/nezuko@v0.1.2/internal/goobj/read.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 package goobj 10 11 import ( 12 "bufio" 13 "bytes" 14 "errors" 15 "fmt" 16 "github.com/gernest/nezuko/internal/objabi" 17 "io" 18 "os" 19 "strconv" 20 "strings" 21 ) 22 23 // A Sym is a named symbol in an object file. 24 type Sym struct { 25 SymID // symbol identifier (name and version) 26 Kind objabi.SymKind // kind of symbol 27 DupOK bool // are duplicate definitions okay? 28 Size int64 // size of corresponding data 29 Type SymID // symbol for Go type information 30 Data Data // memory image of symbol 31 Reloc []Reloc // relocations to apply to Data 32 Func *Func // additional data for functions 33 } 34 35 // A SymID - the combination of Name and Version - uniquely identifies 36 // a symbol within a package. 37 type SymID struct { 38 // Name is the name of a symbol. 39 Name string 40 41 // Version is zero for symbols with global visibility. 42 // Symbols with only file visibility (such as file-level static 43 // declarations in C) have a non-zero version distinguishing 44 // a symbol in one file from a symbol of the same name 45 // in another file 46 Version int64 47 } 48 49 func (s SymID) String() string { 50 if s.Version == 0 { 51 return s.Name 52 } 53 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 54 } 55 56 // A Data is a reference to data stored in an object file. 57 // It records the offset and size of the data, so that a client can 58 // read the data only if necessary. 59 type Data struct { 60 Offset int64 61 Size int64 62 } 63 64 // A Reloc describes a relocation applied to a memory image to refer 65 // to an address within a particular symbol. 66 type Reloc struct { 67 // The bytes at [Offset, Offset+Size) within the containing Sym 68 // should be updated to refer to the address Add bytes after the start 69 // of the symbol Sym. 70 Offset int64 71 Size int64 72 Sym SymID 73 Add int64 74 75 // The Type records the form of address expected in the bytes 76 // described by the previous fields: absolute, PC-relative, and so on. 77 // TODO(rsc): The interpretation of Type is not exposed by this package. 78 Type objabi.RelocType 79 } 80 81 // A Var describes a variable in a function stack frame: a declared 82 // local variable, an input argument, or an output result. 83 type Var struct { 84 // The combination of Name, Kind, and Offset uniquely 85 // identifies a variable in a function stack frame. 86 // Using fewer of these - in particular, using only Name - does not. 87 Name string // Name of variable. 88 Kind int64 // TODO(rsc): Define meaning. 89 Offset int64 // Frame offset. TODO(rsc): Define meaning. 90 91 Type SymID // Go type for variable. 92 } 93 94 // Func contains additional per-symbol information specific to functions. 95 type Func struct { 96 Args int64 // size in bytes of argument frame: inputs and outputs 97 Frame int64 // size in bytes of local variable frame 98 Leaf bool // function omits save of link register (ARM) 99 NoSplit bool // function omits stack split prologue 100 Var []Var // detail about local variables 101 PCSP Data // PC → SP offset map 102 PCFile Data // PC → file number map (index into File) 103 PCLine Data // PC → line number map 104 PCInline Data // PC → inline tree index map 105 PCData []Data // PC → runtime support data map 106 FuncData []FuncData // non-PC-specific runtime support data 107 File []string // paths indexed by PCFile 108 InlTree []InlinedCall 109 } 110 111 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 112 113 // A FuncData is a single function-specific data value. 114 type FuncData struct { 115 Sym SymID // symbol holding data 116 Offset int64 // offset into symbol for funcdata pointer 117 } 118 119 // An InlinedCall is a node in an InlTree. 120 // See github.com/gernest/nezuko/internal/obj.InlTree for details. 121 type InlinedCall struct { 122 Parent int64 123 File string 124 Line int64 125 Func SymID 126 ParentPC int64 127 } 128 129 // A Package is a parsed Go object file or archive defining a Go package. 130 type Package struct { 131 ImportPath string // import path denoting this package 132 Imports []string // packages imported by this package 133 SymRefs []SymID // list of symbol names and versions referred to by this pack 134 Syms []*Sym // symbols defined by this package 135 MaxVersion int64 // maximum Version in any SymID in Syms 136 Arch string // architecture 137 Native []*NativeReader // native object data (e.g. ELF) 138 } 139 140 type NativeReader struct { 141 Name string 142 io.ReaderAt 143 } 144 145 var ( 146 archiveHeader = []byte("!<arch>\n") 147 archiveMagic = []byte("`\n") 148 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 149 150 errCorruptArchive = errors.New("corrupt archive") 151 errTruncatedArchive = errors.New("truncated archive") 152 errCorruptObject = errors.New("corrupt object file") 153 errNotObject = errors.New("unrecognized object file format") 154 ) 155 156 // An objReader is an object file reader. 157 type objReader struct { 158 p *Package 159 b *bufio.Reader 160 f *os.File 161 err error 162 offset int64 163 dataOffset int64 164 limit int64 165 tmp [256]byte 166 pkgprefix string 167 } 168 169 // init initializes r to read package p from f. 170 func (r *objReader) init(f *os.File, p *Package) { 171 r.f = f 172 r.p = p 173 r.offset, _ = f.Seek(0, io.SeekCurrent) 174 r.limit, _ = f.Seek(0, io.SeekEnd) 175 f.Seek(r.offset, io.SeekStart) 176 r.b = bufio.NewReader(f) 177 r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "." 178 } 179 180 // error records that an error occurred. 181 // It returns only the first error, so that an error 182 // caused by an earlier error does not discard information 183 // about the earlier error. 184 func (r *objReader) error(err error) error { 185 if r.err == nil { 186 if err == io.EOF { 187 err = io.ErrUnexpectedEOF 188 } 189 r.err = err 190 } 191 // panic("corrupt") // useful for debugging 192 return r.err 193 } 194 195 // peek returns the next n bytes without advancing the reader. 196 func (r *objReader) peek(n int) ([]byte, error) { 197 if r.err != nil { 198 return nil, r.err 199 } 200 if r.offset >= r.limit { 201 r.error(io.ErrUnexpectedEOF) 202 return nil, r.err 203 } 204 b, err := r.b.Peek(n) 205 if err != nil { 206 if err != bufio.ErrBufferFull { 207 r.error(err) 208 } 209 } 210 return b, err 211 } 212 213 // readByte reads and returns a byte from the input file. 214 // On I/O error or EOF, it records the error but returns byte 0. 215 // A sequence of 0 bytes will eventually terminate any 216 // parsing state in the object file. In particular, it ends the 217 // reading of a varint. 218 func (r *objReader) readByte() byte { 219 if r.err != nil { 220 return 0 221 } 222 if r.offset >= r.limit { 223 r.error(io.ErrUnexpectedEOF) 224 return 0 225 } 226 b, err := r.b.ReadByte() 227 if err != nil { 228 if err == io.EOF { 229 err = io.ErrUnexpectedEOF 230 } 231 r.error(err) 232 b = 0 233 } else { 234 r.offset++ 235 } 236 return b 237 } 238 239 // read reads exactly len(b) bytes from the input file. 240 // If an error occurs, read returns the error but also 241 // records it, so it is safe for callers to ignore the result 242 // as long as delaying the report is not a problem. 243 func (r *objReader) readFull(b []byte) error { 244 if r.err != nil { 245 return r.err 246 } 247 if r.offset+int64(len(b)) > r.limit { 248 return r.error(io.ErrUnexpectedEOF) 249 } 250 n, err := io.ReadFull(r.b, b) 251 r.offset += int64(n) 252 if err != nil { 253 return r.error(err) 254 } 255 return nil 256 } 257 258 // readInt reads a zigzag varint from the input file. 259 func (r *objReader) readInt() int64 { 260 var u uint64 261 262 for shift := uint(0); ; shift += 7 { 263 if shift >= 64 { 264 r.error(errCorruptObject) 265 return 0 266 } 267 c := r.readByte() 268 u |= uint64(c&0x7F) << shift 269 if c&0x80 == 0 { 270 break 271 } 272 } 273 274 return int64(u>>1) ^ (int64(u) << 63 >> 63) 275 } 276 277 // readString reads a length-delimited string from the input file. 278 func (r *objReader) readString() string { 279 n := r.readInt() 280 buf := make([]byte, n) 281 r.readFull(buf) 282 return string(buf) 283 } 284 285 // readSymID reads a SymID from the input file. 286 func (r *objReader) readSymID() SymID { 287 i := r.readInt() 288 return r.p.SymRefs[i] 289 } 290 291 func (r *objReader) readRef() { 292 name, abiOrStatic := r.readString(), r.readInt() 293 294 // In a symbol name in an object file, "". denotes the 295 // prefix for the package in which the object file has been found. 296 // Expand it. 297 name = strings.ReplaceAll(name, `"".`, r.pkgprefix) 298 299 // The ABI field records either the ABI or -1 for static symbols. 300 // 301 // To distinguish different static symbols with the same name, 302 // we use the symbol "version". Version 0 corresponds to 303 // global symbols, and each file has a unique version > 0 for 304 // all of its static symbols. The version is incremented on 305 // each call to parseObject. 306 // 307 // For global symbols, we currently ignore the ABI. 308 // 309 // TODO(austin): Record the ABI in SymID. Since this is a 310 // public API, we'll have to keep Version as 0 and record the 311 // ABI in a new field (which differs from how the linker does 312 // this, but that's okay). Show the ABI in things like 313 // objdump. 314 var vers int64 315 if abiOrStatic == -1 { 316 // Static symbol 317 vers = r.p.MaxVersion 318 } 319 r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers}) 320 } 321 322 // readData reads a data reference from the input file. 323 func (r *objReader) readData() Data { 324 n := r.readInt() 325 d := Data{Offset: r.dataOffset, Size: n} 326 r.dataOffset += n 327 return d 328 } 329 330 // skip skips n bytes in the input. 331 func (r *objReader) skip(n int64) { 332 if n < 0 { 333 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 334 } 335 if n < int64(len(r.tmp)) { 336 // Since the data is so small, a just reading from the buffered 337 // reader is better than flushing the buffer and seeking. 338 r.readFull(r.tmp[:n]) 339 } else if n <= int64(r.b.Buffered()) { 340 // Even though the data is not small, it has already been read. 341 // Advance the buffer instead of seeking. 342 for n > int64(len(r.tmp)) { 343 r.readFull(r.tmp[:]) 344 n -= int64(len(r.tmp)) 345 } 346 r.readFull(r.tmp[:n]) 347 } else { 348 // Seek, giving up buffered data. 349 _, err := r.f.Seek(r.offset+n, io.SeekStart) 350 if err != nil { 351 r.error(err) 352 } 353 r.offset += n 354 r.b.Reset(r.f) 355 } 356 } 357 358 // Parse parses an object file or archive from f, 359 // assuming that its import path is pkgpath. 360 func Parse(f *os.File, pkgpath string) (*Package, error) { 361 if pkgpath == "" { 362 pkgpath = `""` 363 } 364 p := new(Package) 365 p.ImportPath = pkgpath 366 367 var rd objReader 368 rd.init(f, p) 369 err := rd.readFull(rd.tmp[:8]) 370 if err != nil { 371 if err == io.EOF { 372 err = io.ErrUnexpectedEOF 373 } 374 return nil, err 375 } 376 377 switch { 378 default: 379 return nil, errNotObject 380 381 case bytes.Equal(rd.tmp[:8], archiveHeader): 382 if err := rd.parseArchive(); err != nil { 383 return nil, err 384 } 385 case bytes.Equal(rd.tmp[:8], goobjHeader): 386 if err := rd.parseObject(goobjHeader); err != nil { 387 return nil, err 388 } 389 } 390 391 return p, nil 392 } 393 394 // trimSpace removes trailing spaces from b and returns the corresponding string. 395 // This effectively parses the form used in archive headers. 396 func trimSpace(b []byte) string { 397 return string(bytes.TrimRight(b, " ")) 398 } 399 400 // parseArchive parses a Unix archive of Go object files. 401 func (r *objReader) parseArchive() error { 402 for r.offset < r.limit { 403 if err := r.readFull(r.tmp[:60]); err != nil { 404 return err 405 } 406 data := r.tmp[:60] 407 408 // Each file is preceded by this text header (slice indices in first column): 409 // 0:16 name 410 // 16:28 date 411 // 28:34 uid 412 // 34:40 gid 413 // 40:48 mode 414 // 48:58 size 415 // 58:60 magic - `\n 416 // We only care about name, size, and magic. 417 // The fields are space-padded on the right. 418 // The size is in decimal. 419 // The file data - size bytes - follows the header. 420 // Headers are 2-byte aligned, so if size is odd, an extra padding 421 // byte sits between the file data and the next header. 422 // The file data that follows is padded to an even number of bytes: 423 // if size is odd, an extra padding byte is inserted betw the next header. 424 if len(data) < 60 { 425 return errTruncatedArchive 426 } 427 if !bytes.Equal(data[58:60], archiveMagic) { 428 return errCorruptArchive 429 } 430 name := trimSpace(data[0:16]) 431 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 432 if err != nil { 433 return errCorruptArchive 434 } 435 data = data[60:] 436 fsize := size + size&1 437 if fsize < 0 || fsize < size { 438 return errCorruptArchive 439 } 440 switch name { 441 case "__.PKGDEF": 442 r.skip(size) 443 default: 444 oldLimit := r.limit 445 r.limit = r.offset + size 446 447 p, err := r.peek(8) 448 if err != nil { 449 return err 450 } 451 if bytes.Equal(p, goobjHeader) { 452 if err := r.parseObject(nil); err != nil { 453 return fmt.Errorf("parsing archive member %q: %v", name, err) 454 } 455 } else { 456 r.p.Native = append(r.p.Native, &NativeReader{ 457 Name: name, 458 ReaderAt: io.NewSectionReader(r.f, r.offset, size), 459 }) 460 } 461 462 r.skip(r.limit - r.offset) 463 r.limit = oldLimit 464 } 465 if size&1 != 0 { 466 r.skip(1) 467 } 468 } 469 return nil 470 } 471 472 // parseObject parses a single Go object file. 473 // The prefix is the bytes already read from the file, 474 // typically in order to detect that this is an object file. 475 // The object file consists of a textual header ending in "\n!\n" 476 // and then the part we want to parse begins. 477 // The format of that part is defined in a comment at the top 478 // of src/liblink/objfile.c. 479 func (r *objReader) parseObject(prefix []byte) error { 480 r.p.MaxVersion++ 481 h := make([]byte, 0, 256) 482 h = append(h, prefix...) 483 var c1, c2, c3 byte 484 for { 485 c1, c2, c3 = c2, c3, r.readByte() 486 h = append(h, c3) 487 // The new export format can contain 0 bytes. 488 // Don't consider them errors, only look for r.err != nil. 489 if r.err != nil { 490 return errCorruptObject 491 } 492 if c1 == '\n' && c2 == '!' && c3 == '\n' { 493 break 494 } 495 } 496 497 hs := strings.Fields(string(h)) 498 if len(hs) >= 4 { 499 r.p.Arch = hs[3] 500 } 501 // TODO: extract OS + build ID if/when we need it 502 503 r.readFull(r.tmp[:8]) 504 if !bytes.Equal(r.tmp[:8], []byte("\x00go112ld")) { 505 return r.error(errCorruptObject) 506 } 507 508 b := r.readByte() 509 if b != 1 { 510 return r.error(errCorruptObject) 511 } 512 513 // Direct package dependencies. 514 for { 515 s := r.readString() 516 if s == "" { 517 break 518 } 519 r.p.Imports = append(r.p.Imports, s) 520 } 521 522 r.p.SymRefs = []SymID{{"", 0}} 523 for { 524 if b := r.readByte(); b != 0xfe { 525 if b != 0xff { 526 return r.error(errCorruptObject) 527 } 528 break 529 } 530 531 r.readRef() 532 } 533 534 dataLength := r.readInt() 535 r.readInt() // n relocations - ignore 536 r.readInt() // n pcdata - ignore 537 r.readInt() // n autom - ignore 538 r.readInt() // n funcdata - ignore 539 r.readInt() // n files - ignore 540 541 r.dataOffset = r.offset 542 r.skip(dataLength) 543 544 // Symbols. 545 for { 546 if b := r.readByte(); b != 0xfe { 547 if b != 0xff { 548 return r.error(errCorruptObject) 549 } 550 break 551 } 552 553 typ := r.readByte() 554 s := &Sym{SymID: r.readSymID()} 555 r.p.Syms = append(r.p.Syms, s) 556 s.Kind = objabi.SymKind(typ) 557 flags := r.readInt() 558 s.DupOK = flags&1 != 0 559 s.Size = r.readInt() 560 s.Type = r.readSymID() 561 s.Data = r.readData() 562 s.Reloc = make([]Reloc, r.readInt()) 563 for i := range s.Reloc { 564 rel := &s.Reloc[i] 565 rel.Offset = r.readInt() 566 rel.Size = r.readInt() 567 rel.Type = objabi.RelocType(r.readInt()) 568 rel.Add = r.readInt() 569 rel.Sym = r.readSymID() 570 } 571 572 if s.Kind == objabi.STEXT { 573 f := new(Func) 574 s.Func = f 575 f.Args = r.readInt() 576 f.Frame = r.readInt() 577 flags := r.readInt() 578 f.Leaf = flags&(1<<0) != 0 579 f.NoSplit = r.readInt() != 0 580 f.Var = make([]Var, r.readInt()) 581 for i := range f.Var { 582 v := &f.Var[i] 583 v.Name = r.readSymID().Name 584 v.Offset = r.readInt() 585 v.Kind = r.readInt() 586 v.Type = r.readSymID() 587 } 588 589 f.PCSP = r.readData() 590 f.PCFile = r.readData() 591 f.PCLine = r.readData() 592 f.PCInline = r.readData() 593 f.PCData = make([]Data, r.readInt()) 594 for i := range f.PCData { 595 f.PCData[i] = r.readData() 596 } 597 f.FuncData = make([]FuncData, r.readInt()) 598 for i := range f.FuncData { 599 f.FuncData[i].Sym = r.readSymID() 600 } 601 for i := range f.FuncData { 602 f.FuncData[i].Offset = r.readInt() // TODO 603 } 604 f.File = make([]string, r.readInt()) 605 for i := range f.File { 606 f.File[i] = r.readSymID().Name 607 } 608 f.InlTree = make([]InlinedCall, r.readInt()) 609 for i := range f.InlTree { 610 f.InlTree[i].Parent = r.readInt() 611 f.InlTree[i].File = r.readSymID().Name 612 f.InlTree[i].Line = r.readInt() 613 f.InlTree[i].Func = r.readSymID() 614 f.InlTree[i].ParentPC = r.readInt() 615 } 616 } 617 } 618 619 r.readFull(r.tmp[:7]) 620 if !bytes.Equal(r.tmp[:7], []byte("go112ld")) { 621 return r.error(errCorruptObject) 622 } 623 624 return nil 625 } 626 627 func (r *Reloc) String(insnOffset uint64) string { 628 delta := r.Offset - int64(insnOffset) 629 s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) 630 if r.Sym.Name != "" { 631 if r.Add != 0 { 632 return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) 633 } 634 return fmt.Sprintf("%s:%s", s, r.Sym.Name) 635 } 636 if r.Add != 0 { 637 return fmt.Sprintf("%s:%d", s, r.Add) 638 } 639 return s 640 }