github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/link/internal/ld/objfile.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ld 6 7 // Reading of Go object files. 8 // 9 // Originally, Go object files were Plan 9 object files, but no longer. 10 // Now they are more like standard object files, in that each symbol is defined 11 // by an associated memory image (bytes) and a list of relocations to apply 12 // during linking. We do not (yet?) use a standard file format, however. 13 // For now, the format is chosen to be as simple as possible to read and write. 14 // It may change for reasons of efficiency, or we may even switch to a 15 // standard file format if there are compelling benefits to doing so. 16 // See golang.org/s/go13linker for more background. 17 // 18 // The file format is: 19 // 20 // - magic header: "\x00\x00go19ld" 21 // - byte 1 - version number 22 // - sequence of strings giving dependencies (imported packages) 23 // - empty string (marks end of sequence) 24 // - sequence of symbol references used by the defined symbols 25 // - byte 0xff (marks end of sequence) 26 // - sequence of integer lengths: 27 // - total data length 28 // - total number of relocations 29 // - total number of pcdata 30 // - total number of automatics 31 // - total number of funcdata 32 // - total number of files 33 // - data, the content of the defined symbols 34 // - sequence of defined symbols 35 // - byte 0xff (marks end of sequence) 36 // - magic footer: "\xff\xffgo19ld" 37 // 38 // All integers are stored in a zigzag varint format. 39 // See golang.org/s/go12symtab for a definition. 40 // 41 // Data blocks and strings are both stored as an integer 42 // followed by that many bytes. 43 // 44 // A symbol reference is a string name followed by a version. 45 // 46 // A symbol points to other symbols using an index into the symbol 47 // reference sequence. Index 0 corresponds to a nil Object* pointer. 48 // In the symbol layout described below "symref index" stands for this 49 // index. 50 // 51 // Each symbol is laid out as the following fields (taken from Object*): 52 // 53 // - byte 0xfe (sanity check for synchronization) 54 // - type [int] 55 // - name & version [symref index] 56 // - flags [int] 57 // 1<<0 dupok 58 // 1<<1 local 59 // 1<<2 add to typelink table 60 // - size [int] 61 // - gotype [symref index] 62 // - p [data block] 63 // - nr [int] 64 // - r [nr relocations, sorted by off] 65 // 66 // If type == STEXT, there are a few more fields: 67 // 68 // - args [int] 69 // - locals [int] 70 // - nosplit [int] 71 // - flags [int] 72 // 1<<0 leaf 73 // 1<<1 C function 74 // 1<<2 function may call reflect.Type.Method 75 // - nlocal [int] 76 // - local [nlocal automatics] 77 // - pcln [pcln table] 78 // 79 // Each relocation has the encoding: 80 // 81 // - off [int] 82 // - siz [int] 83 // - type [int] 84 // - add [int] 85 // - sym [symref index] 86 // 87 // Each local has the encoding: 88 // 89 // - asym [symref index] 90 // - offset [int] 91 // - type [int] 92 // - gotype [symref index] 93 // 94 // The pcln table has the encoding: 95 // 96 // - pcsp [data block] 97 // - pcfile [data block] 98 // - pcline [data block] 99 // - pcinline [data block] 100 // - npcdata [int] 101 // - pcdata [npcdata data blocks] 102 // - nfuncdata [int] 103 // - funcdata [nfuncdata symref index] 104 // - funcdatasym [nfuncdata ints] 105 // - nfile [int] 106 // - file [nfile symref index] 107 // - ninlinedcall [int] 108 // - inlinedcall [ninlinedcall int symref int symref] 109 // 110 // The file layout and meaning of type integers are architecture-independent. 111 // 112 // TODO(rsc): The file format is good for a first pass but needs work. 113 // - There are SymID in the object file that should really just be strings. 114 115 import ( 116 "bufio" 117 "bytes" 118 "cmd/internal/bio" 119 "cmd/internal/dwarf" 120 "cmd/internal/obj" 121 "crypto/sha1" 122 "encoding/base64" 123 "io" 124 "log" 125 "strconv" 126 "strings" 127 ) 128 129 const ( 130 startmagic = "\x00\x00go19ld" 131 endmagic = "\xff\xffgo19ld" 132 ) 133 134 var emptyPkg = []byte(`"".`) 135 136 // objReader reads Go object files. 137 type objReader struct { 138 rd *bufio.Reader 139 ctxt *Link 140 lib *Library 141 pn string 142 dupSym *Symbol 143 localSymVersion int 144 145 // rdBuf is used by readString and readSymName as scratch for reading strings. 146 rdBuf []byte 147 148 // List of symbol references for the file being read. 149 refs []*Symbol 150 data []byte 151 reloc []Reloc 152 pcdata []Pcdata 153 autom []Auto 154 funcdata []*Symbol 155 funcdataoff []int64 156 file []*Symbol 157 } 158 159 func LoadObjFile(ctxt *Link, f *bio.Reader, lib *Library, length int64, pn string) { 160 161 start := f.Offset() 162 r := &objReader{ 163 rd: f.Reader, 164 lib: lib, 165 ctxt: ctxt, 166 pn: pn, 167 dupSym: &Symbol{Name: ".dup"}, 168 localSymVersion: ctxt.Syms.IncVersion(), 169 } 170 r.loadObjFile() 171 if f.Offset() != start+length { 172 log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length) 173 } 174 } 175 176 func (r *objReader) loadObjFile() { 177 pkg := pathtoprefix(r.lib.Pkg) 178 179 // Magic header 180 var buf [8]uint8 181 r.readFull(buf[:]) 182 if string(buf[:]) != startmagic { 183 log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]) 184 } 185 186 // Version 187 c, err := r.rd.ReadByte() 188 if err != nil || c != 1 { 189 log.Fatalf("%s: invalid file version number %d", r.pn, c) 190 } 191 192 // Autolib 193 for { 194 lib := r.readString() 195 if lib == "" { 196 break 197 } 198 l := addlib(r.ctxt, pkg, r.pn, lib) 199 if l != nil { 200 r.lib.imports = append(r.lib.imports, l) 201 } 202 } 203 204 // Symbol references 205 r.refs = []*Symbol{nil} // zeroth ref is nil 206 for { 207 c, err := r.rd.Peek(1) 208 if err != nil { 209 log.Fatalf("%s: peeking: %v", r.pn, err) 210 } 211 if c[0] == 0xff { 212 r.rd.ReadByte() 213 break 214 } 215 r.readRef() 216 } 217 218 // Lengths 219 r.readSlices() 220 221 // Data section 222 r.readFull(r.data) 223 224 // Defined symbols 225 for { 226 c, err := r.rd.Peek(1) 227 if err != nil { 228 log.Fatalf("%s: peeking: %v", r.pn, err) 229 } 230 if c[0] == 0xff { 231 break 232 } 233 r.readSym() 234 } 235 236 // Magic footer 237 buf = [8]uint8{} 238 r.readFull(buf[:]) 239 if string(buf[:]) != endmagic { 240 log.Fatalf("%s: invalid file end", r.pn) 241 } 242 } 243 244 func (r *objReader) readSlices() { 245 n := r.readInt() 246 r.data = make([]byte, n) 247 n = r.readInt() 248 r.reloc = make([]Reloc, n) 249 n = r.readInt() 250 r.pcdata = make([]Pcdata, n) 251 n = r.readInt() 252 r.autom = make([]Auto, n) 253 n = r.readInt() 254 r.funcdata = make([]*Symbol, n) 255 r.funcdataoff = make([]int64, n) 256 n = r.readInt() 257 r.file = make([]*Symbol, n) 258 } 259 260 // Symbols are prefixed so their content doesn't get confused with the magic footer. 261 const symPrefix = 0xfe 262 263 func (r *objReader) readSym() { 264 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 265 log.Fatalln("readSym out of sync") 266 } 267 t := obj.SymKind(r.readInt()) 268 s := r.readSymIndex() 269 flags := r.readInt() 270 dupok := flags&1 != 0 271 local := flags&2 != 0 272 makeTypelink := flags&4 != 0 273 size := r.readInt() 274 typ := r.readSymIndex() 275 data := r.readData() 276 nreloc := r.readInt() 277 pkg := pathtoprefix(r.lib.Pkg) 278 isdup := false 279 280 var dup *Symbol 281 if s.Type != 0 && s.Type != obj.SXREF { 282 if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 { 283 if s.Size < int64(size) { 284 s.Size = int64(size) 285 } 286 if typ != nil && s.Gotype == nil { 287 s.Gotype = typ 288 } 289 return 290 } 291 292 if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 { 293 goto overwrite 294 } 295 if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() { 296 log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn) 297 } 298 if len(s.P) > 0 { 299 dup = s 300 s = r.dupSym 301 isdup = true 302 } 303 } 304 305 overwrite: 306 s.File = pkg 307 if dupok { 308 s.Attr |= AttrDuplicateOK 309 } 310 if t == obj.SXREF { 311 log.Fatalf("bad sxref") 312 } 313 if t == 0 { 314 log.Fatalf("missing type for %s in %s", s.Name, r.pn) 315 } 316 if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) { 317 t = s.Type 318 } 319 s.Type = t 320 if s.Size < int64(size) { 321 s.Size = int64(size) 322 } 323 s.Attr.Set(AttrLocal, local) 324 s.Attr.Set(AttrMakeTypelink, makeTypelink) 325 if typ != nil { 326 s.Gotype = typ 327 } 328 if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def 329 dup.Gotype = typ 330 } 331 s.P = data 332 if nreloc > 0 { 333 s.R = r.reloc[:nreloc:nreloc] 334 if !isdup { 335 r.reloc = r.reloc[nreloc:] 336 } 337 338 for i := 0; i < nreloc; i++ { 339 s.R[i] = Reloc{ 340 Off: r.readInt32(), 341 Siz: r.readUint8(), 342 Type: obj.RelocType(r.readInt32()), 343 Add: r.readInt64(), 344 Sym: r.readSymIndex(), 345 } 346 } 347 } 348 349 if s.Type == obj.STEXT { 350 s.FuncInfo = new(FuncInfo) 351 pc := s.FuncInfo 352 353 pc.Args = r.readInt32() 354 pc.Locals = r.readInt32() 355 if r.readUint8() != 0 { 356 s.Attr |= AttrNoSplit 357 } 358 flags := r.readInt() 359 if flags&(1<<2) != 0 { 360 s.Attr |= AttrReflectMethod 361 } 362 n := r.readInt() 363 pc.Autom = r.autom[:n:n] 364 if !isdup { 365 r.autom = r.autom[n:] 366 } 367 368 for i := 0; i < n; i++ { 369 pc.Autom[i] = Auto{ 370 Asym: r.readSymIndex(), 371 Aoffset: r.readInt32(), 372 Name: r.readInt16(), 373 Gotype: r.readSymIndex(), 374 } 375 } 376 377 pc.Pcsp.P = r.readData() 378 pc.Pcfile.P = r.readData() 379 pc.Pcline.P = r.readData() 380 pc.Pcinline.P = r.readData() 381 n = r.readInt() 382 pc.Pcdata = r.pcdata[:n:n] 383 if !isdup { 384 r.pcdata = r.pcdata[n:] 385 } 386 for i := 0; i < n; i++ { 387 pc.Pcdata[i].P = r.readData() 388 } 389 n = r.readInt() 390 pc.Funcdata = r.funcdata[:n:n] 391 pc.Funcdataoff = r.funcdataoff[:n:n] 392 if !isdup { 393 r.funcdata = r.funcdata[n:] 394 r.funcdataoff = r.funcdataoff[n:] 395 } 396 for i := 0; i < n; i++ { 397 pc.Funcdata[i] = r.readSymIndex() 398 } 399 for i := 0; i < n; i++ { 400 pc.Funcdataoff[i] = r.readInt64() 401 } 402 n = r.readInt() 403 pc.File = r.file[:n:n] 404 if !isdup { 405 r.file = r.file[n:] 406 } 407 for i := 0; i < n; i++ { 408 pc.File[i] = r.readSymIndex() 409 } 410 n = r.readInt() 411 pc.InlTree = make([]InlinedCall, n) 412 for i := 0; i < n; i++ { 413 pc.InlTree[i].Parent = r.readInt32() 414 pc.InlTree[i].File = r.readSymIndex() 415 pc.InlTree[i].Line = r.readInt32() 416 pc.InlTree[i].Func = r.readSymIndex() 417 } 418 419 if !dupok { 420 if s.Attr.OnList() { 421 log.Fatalf("symbol %s listed multiple times", s.Name) 422 } 423 s.Attr |= AttrOnList 424 r.lib.textp = append(r.lib.textp, s) 425 } else { 426 // there may ba a dup in another package 427 // put into a temp list and add to text later 428 if !isdup { 429 r.lib.dupTextSyms = append(r.lib.dupTextSyms, s) 430 } else { 431 r.lib.dupTextSyms = append(r.lib.dupTextSyms, dup) 432 } 433 } 434 } 435 if s.Type == obj.SDWARFINFO { 436 r.patchDWARFName(s) 437 } 438 } 439 440 func (r *objReader) patchDWARFName(s *Symbol) { 441 // This is kind of ugly. Really the package name should not 442 // even be included here. 443 if s.Size < 1 || s.P[0] != dwarf.DW_ABRV_FUNCTION { 444 return 445 } 446 e := bytes.IndexByte(s.P, 0) 447 if e == -1 { 448 return 449 } 450 p := bytes.Index(s.P[:e], emptyPkg) 451 if p == -1 { 452 return 453 } 454 pkgprefix := []byte(pathtoprefix(r.lib.Pkg) + ".") 455 patched := bytes.Replace(s.P[:e], emptyPkg, pkgprefix, -1) 456 457 s.P = append(patched, s.P[e:]...) 458 delta := int64(len(s.P)) - s.Size 459 s.Size = int64(len(s.P)) 460 for i := range s.R { 461 r := &s.R[i] 462 if r.Off > int32(e) { 463 r.Off += int32(delta) 464 } 465 } 466 } 467 468 func (r *objReader) readFull(b []byte) { 469 _, err := io.ReadFull(r.rd, b) 470 if err != nil { 471 log.Fatalf("%s: error reading %s", r.pn, err) 472 } 473 } 474 475 func (r *objReader) readRef() { 476 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 477 log.Fatalf("readSym out of sync") 478 } 479 name := r.readSymName() 480 v := r.readInt() 481 if v != 0 && v != 1 { 482 log.Fatalf("invalid symbol version %d", v) 483 } 484 if v == 1 { 485 v = r.localSymVersion 486 } 487 s := r.ctxt.Syms.Lookup(name, v) 488 r.refs = append(r.refs, s) 489 490 if s == nil || v != 0 { 491 return 492 } 493 if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 { 494 x, err := strconv.ParseUint(s.Name[5:], 16, 64) 495 if err != nil { 496 log.Panicf("failed to parse $-symbol %s: %v", s.Name, err) 497 } 498 s.Type = obj.SRODATA 499 s.Attr |= AttrLocal 500 switch s.Name[:5] { 501 case "$f32.": 502 if uint64(uint32(x)) != x { 503 log.Panicf("$-symbol %s too large: %d", s.Name, x) 504 } 505 Adduint32(r.ctxt, s, uint32(x)) 506 case "$f64.", "$i64.": 507 Adduint64(r.ctxt, s, x) 508 default: 509 log.Panicf("unrecognized $-symbol: %s", s.Name) 510 } 511 s.Attr.Set(AttrReachable, false) 512 } 513 if strings.HasPrefix(s.Name, "runtime.gcbits.") { 514 s.Attr |= AttrLocal 515 } 516 } 517 518 func (r *objReader) readInt64() int64 { 519 uv := uint64(0) 520 for shift := uint(0); ; shift += 7 { 521 if shift >= 64 { 522 log.Fatalf("corrupt input") 523 } 524 c, err := r.rd.ReadByte() 525 if err != nil { 526 log.Fatalln("error reading input: ", err) 527 } 528 uv |= uint64(c&0x7F) << shift 529 if c&0x80 == 0 { 530 break 531 } 532 } 533 534 return int64(uv>>1) ^ (int64(uv<<63) >> 63) 535 } 536 537 func (r *objReader) readInt() int { 538 n := r.readInt64() 539 if int64(int(n)) != n { 540 log.Panicf("%v out of range for int", n) 541 } 542 return int(n) 543 } 544 545 func (r *objReader) readInt32() int32 { 546 n := r.readInt64() 547 if int64(int32(n)) != n { 548 log.Panicf("%v out of range for int32", n) 549 } 550 return int32(n) 551 } 552 553 func (r *objReader) readInt16() int16 { 554 n := r.readInt64() 555 if int64(int16(n)) != n { 556 log.Panicf("%v out of range for int16", n) 557 } 558 return int16(n) 559 } 560 561 func (r *objReader) readUint8() uint8 { 562 n := r.readInt64() 563 if int64(uint8(n)) != n { 564 log.Panicf("%v out of range for uint8", n) 565 } 566 return uint8(n) 567 } 568 569 func (r *objReader) readString() string { 570 n := r.readInt() 571 if cap(r.rdBuf) < n { 572 r.rdBuf = make([]byte, 2*n) 573 } 574 r.readFull(r.rdBuf[:n]) 575 return string(r.rdBuf[:n]) 576 } 577 578 func (r *objReader) readData() []byte { 579 n := r.readInt() 580 p := r.data[:n:n] 581 r.data = r.data[n:] 582 return p 583 } 584 585 // readSymName reads a symbol name, replacing all "". with pkg. 586 func (r *objReader) readSymName() string { 587 pkg := pathtoprefix(r.lib.Pkg) 588 n := r.readInt() 589 if n == 0 { 590 r.readInt64() 591 return "" 592 } 593 if cap(r.rdBuf) < n { 594 r.rdBuf = make([]byte, 2*n) 595 } 596 origName, err := r.rd.Peek(n) 597 if err == bufio.ErrBufferFull { 598 // Long symbol names are rare but exist. One source is type 599 // symbols for types with long string forms. See #15104. 600 origName = make([]byte, n) 601 r.readFull(origName) 602 } else if err != nil { 603 log.Fatalf("%s: error reading symbol: %v", r.pn, err) 604 } 605 adjName := r.rdBuf[:0] 606 for { 607 i := bytes.Index(origName, emptyPkg) 608 if i == -1 { 609 s := string(append(adjName, origName...)) 610 // Read past the peeked origName, now that we're done with it, 611 // using the rfBuf (also no longer used) as the scratch space. 612 // TODO: use bufio.Reader.Discard if available instead? 613 if err == nil { 614 r.readFull(r.rdBuf[:n]) 615 } 616 r.rdBuf = adjName[:0] // in case 2*n wasn't enough 617 618 if Buildmode == BuildmodeShared || *FlagLinkshared { 619 // These types are included in the symbol 620 // table when dynamically linking. To keep 621 // binary size down, we replace the names 622 // with SHA-1 prefixes. 623 // 624 // Keep the type.. prefix, which parts of the 625 // linker (like the DWARF generator) know means 626 // the symbol is not decodable. 627 // 628 // Leave type.runtime. symbols alone, because 629 // other parts of the linker manipulates them, 630 // and also symbols whose names would not be 631 // shortened by this process. 632 if len(s) > 14 && strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") { 633 hash := sha1.Sum([]byte(s)) 634 prefix := "type." 635 if s[5] == '.' { 636 prefix = "type.." 637 } 638 s = prefix + base64.StdEncoding.EncodeToString(hash[:6]) 639 } 640 } 641 return s 642 } 643 adjName = append(adjName, origName[:i]...) 644 adjName = append(adjName, pkg...) 645 adjName = append(adjName, '.') 646 origName = origName[i+len(emptyPkg):] 647 } 648 } 649 650 // Reads the index of a symbol reference and resolves it to a symbol 651 func (r *objReader) readSymIndex() *Symbol { 652 i := r.readInt() 653 return r.refs[i] 654 }