github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/cmd/link/internal/ld/objfile.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ld 6 7 // Reading of Go object files. 8 // 9 // Originally, Go object files were Plan 9 object files, but no longer. 10 // Now they are more like standard object files, in that each symbol is defined 11 // by an associated memory image (bytes) and a list of relocations to apply 12 // during linking. We do not (yet?) use a standard file format, however. 13 // For now, the format is chosen to be as simple as possible to read and write. 14 // It may change for reasons of efficiency, or we may even switch to a 15 // standard file format if there are compelling benefits to doing so. 16 // See golang.org/s/go13linker for more background. 17 // 18 // The file format is: 19 // 20 // - magic header: "\x00\x00go17ld" 21 // - byte 1 - version number 22 // - sequence of strings giving dependencies (imported packages) 23 // - empty string (marks end of sequence) 24 // - sequence of symbol references used by the defined symbols 25 // - byte 0xff (marks end of sequence) 26 // - sequence of integer lengths: 27 // - total data length 28 // - total number of relocations 29 // - total number of pcdata 30 // - total number of automatics 31 // - total number of funcdata 32 // - total number of files 33 // - data, the content of the defined symbols 34 // - sequence of defined symbols 35 // - byte 0xff (marks end of sequence) 36 // - magic footer: "\xff\xffgo17ld" 37 // 38 // All integers are stored in a zigzag varint format. 39 // See golang.org/s/go12symtab for a definition. 40 // 41 // Data blocks and strings are both stored as an integer 42 // followed by that many bytes. 43 // 44 // A symbol reference is a string name followed by a version. 45 // 46 // A symbol points to other symbols using an index into the symbol 47 // reference sequence. Index 0 corresponds to a nil Object* pointer. 48 // In the symbol layout described below "symref index" stands for this 49 // index. 50 // 51 // Each symbol is laid out as the following fields (taken from Object*): 52 // 53 // - byte 0xfe (sanity check for synchronization) 54 // - type [int] 55 // - name & version [symref index] 56 // - flags [int] 57 // 1 dupok 58 // - size [int] 59 // - gotype [symref index] 60 // - p [data block] 61 // - nr [int] 62 // - r [nr relocations, sorted by off] 63 // 64 // If type == STEXT, there are a few more fields: 65 // 66 // - args [int] 67 // - locals [int] 68 // - nosplit [int] 69 // - flags [int] 70 // 1<<0 leaf 71 // 1<<1 C function 72 // 1<<2 function may call reflect.Type.Method 73 // - nlocal [int] 74 // - local [nlocal automatics] 75 // - pcln [pcln table] 76 // 77 // Each relocation has the encoding: 78 // 79 // - off [int] 80 // - siz [int] 81 // - type [int] 82 // - add [int] 83 // - sym [symref index] 84 // 85 // Each local has the encoding: 86 // 87 // - asym [symref index] 88 // - offset [int] 89 // - type [int] 90 // - gotype [symref index] 91 // 92 // The pcln table has the encoding: 93 // 94 // - pcsp [data block] 95 // - pcfile [data block] 96 // - pcline [data block] 97 // - npcdata [int] 98 // - pcdata [npcdata data blocks] 99 // - nfuncdata [int] 100 // - funcdata [nfuncdata symref index] 101 // - funcdatasym [nfuncdata ints] 102 // - nfile [int] 103 // - file [nfile symref index] 104 // 105 // The file layout and meaning of type integers are architecture-independent. 106 // 107 // TODO(rsc): The file format is good for a first pass but needs work. 108 // - There are SymID in the object file that should really just be strings. 109 110 import ( 111 "bufio" 112 "bytes" 113 "cmd/internal/bio" 114 "cmd/internal/dwarf" 115 "cmd/internal/obj" 116 "crypto/sha1" 117 "encoding/base64" 118 "io" 119 "log" 120 "strconv" 121 "strings" 122 ) 123 124 const ( 125 startmagic = "\x00\x00go17ld" 126 endmagic = "\xff\xffgo17ld" 127 ) 128 129 var emptyPkg = []byte(`"".`) 130 131 // objReader reads Go object files. 132 type objReader struct { 133 rd *bufio.Reader 134 ctxt *Link 135 pkg string 136 pn string 137 // List of symbol references for the file being read. 138 dupSym *Symbol 139 140 // rdBuf is used by readString and readSymName as scratch for reading strings. 141 rdBuf []byte 142 143 refs []*Symbol 144 data []byte 145 reloc []Reloc 146 pcdata []Pcdata 147 autom []Auto 148 funcdata []*Symbol 149 funcdataoff []int64 150 file []*Symbol 151 } 152 153 func LoadObjFile(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { 154 start := f.Offset() 155 r := &objReader{ 156 rd: f.Reader, 157 pkg: pkg, 158 ctxt: ctxt, 159 pn: pn, 160 dupSym: &Symbol{Name: ".dup"}, 161 } 162 r.loadObjFile() 163 if f.Offset() != start+length { 164 log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length) 165 } 166 } 167 168 func (r *objReader) loadObjFile() { 169 // Increment context version, versions are used to differentiate static files in different packages 170 r.ctxt.IncVersion() 171 172 // Magic header 173 var buf [8]uint8 174 r.readFull(buf[:]) 175 if string(buf[:]) != startmagic { 176 log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]) 177 } 178 179 // Version 180 c, err := r.rd.ReadByte() 181 if err != nil || c != 1 { 182 log.Fatalf("%s: invalid file version number %d", r.pn, c) 183 } 184 185 // Autolib 186 for { 187 lib := r.readString() 188 if lib == "" { 189 break 190 } 191 addlib(r.ctxt, r.pkg, r.pn, lib) 192 } 193 194 // Symbol references 195 r.refs = []*Symbol{nil} // zeroth ref is nil 196 for { 197 c, err := r.rd.Peek(1) 198 if err != nil { 199 log.Fatalf("%s: peeking: %v", r.pn, err) 200 } 201 if c[0] == 0xff { 202 r.rd.ReadByte() 203 break 204 } 205 r.readRef() 206 } 207 208 // Lengths 209 r.readSlices() 210 211 // Data section 212 r.readFull(r.data) 213 214 // Defined symbols 215 for { 216 c, err := r.rd.Peek(1) 217 if err != nil { 218 log.Fatalf("%s: peeking: %v", r.pn, err) 219 } 220 if c[0] == 0xff { 221 break 222 } 223 r.readSym() 224 } 225 226 // Magic footer 227 buf = [8]uint8{} 228 r.readFull(buf[:]) 229 if string(buf[:]) != endmagic { 230 log.Fatalf("%s: invalid file end", r.pn) 231 } 232 } 233 234 func (r *objReader) readSlices() { 235 n := r.readInt() 236 r.data = make([]byte, n) 237 n = r.readInt() 238 r.reloc = make([]Reloc, n) 239 n = r.readInt() 240 r.pcdata = make([]Pcdata, n) 241 n = r.readInt() 242 r.autom = make([]Auto, n) 243 n = r.readInt() 244 r.funcdata = make([]*Symbol, n) 245 r.funcdataoff = make([]int64, n) 246 n = r.readInt() 247 r.file = make([]*Symbol, n) 248 } 249 250 // Symbols are prefixed so their content doesn't get confused with the magic footer. 251 const symPrefix = 0xfe 252 253 func (r *objReader) readSym() { 254 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 255 log.Fatalln("readSym out of sync") 256 } 257 t := r.readInt() 258 s := r.readSymIndex() 259 flags := r.readInt() 260 dupok := flags&1 != 0 261 local := flags&2 != 0 262 size := r.readInt() 263 typ := r.readSymIndex() 264 data := r.readData() 265 nreloc := r.readInt() 266 isdup := false 267 268 var dup *Symbol 269 if s.Type != 0 && s.Type != obj.SXREF { 270 if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 { 271 if s.Size < int64(size) { 272 s.Size = int64(size) 273 } 274 if typ != nil && s.Gotype == nil { 275 s.Gotype = typ 276 } 277 return 278 } 279 280 if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 { 281 goto overwrite 282 } 283 if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() { 284 log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn) 285 } 286 if len(s.P) > 0 { 287 dup = s 288 s = r.dupSym 289 isdup = true 290 } 291 } 292 293 overwrite: 294 s.File = r.pkg 295 if dupok { 296 s.Attr |= AttrDuplicateOK 297 } 298 if t == obj.SXREF { 299 log.Fatalf("bad sxref") 300 } 301 if t == 0 { 302 log.Fatalf("missing type for %s in %s", s.Name, r.pn) 303 } 304 if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) { 305 t = int(s.Type) 306 } 307 s.Type = int16(t) 308 if s.Size < int64(size) { 309 s.Size = int64(size) 310 } 311 s.Attr.Set(AttrLocal, local) 312 if typ != nil { 313 s.Gotype = typ 314 } 315 if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def 316 dup.Gotype = typ 317 } 318 s.P = data 319 if nreloc > 0 { 320 s.R = r.reloc[:nreloc:nreloc] 321 if !isdup { 322 r.reloc = r.reloc[nreloc:] 323 } 324 325 for i := 0; i < nreloc; i++ { 326 s.R[i] = Reloc{ 327 Off: r.readInt32(), 328 Siz: r.readUint8(), 329 Type: obj.RelocType(r.readInt32()), 330 Add: r.readInt64(), 331 Sym: r.readSymIndex(), 332 } 333 } 334 } 335 336 if s.Type == obj.STEXT { 337 s.FuncInfo = new(FuncInfo) 338 pc := s.FuncInfo 339 340 pc.Args = r.readInt32() 341 pc.Locals = r.readInt32() 342 if r.readUint8() != 0 { 343 s.Attr |= AttrNoSplit 344 } 345 flags := r.readInt() 346 if flags&(1<<2) != 0 { 347 s.Attr |= AttrReflectMethod 348 } 349 n := r.readInt() 350 pc.Autom = r.autom[:n:n] 351 if !isdup { 352 r.autom = r.autom[n:] 353 } 354 355 for i := 0; i < n; i++ { 356 pc.Autom[i] = Auto{ 357 Asym: r.readSymIndex(), 358 Aoffset: r.readInt32(), 359 Name: r.readInt16(), 360 Gotype: r.readSymIndex(), 361 } 362 } 363 364 pc.Pcsp.P = r.readData() 365 pc.Pcfile.P = r.readData() 366 pc.Pcline.P = r.readData() 367 n = r.readInt() 368 pc.Pcdata = r.pcdata[:n:n] 369 if !isdup { 370 r.pcdata = r.pcdata[n:] 371 } 372 for i := 0; i < n; i++ { 373 pc.Pcdata[i].P = r.readData() 374 } 375 n = r.readInt() 376 pc.Funcdata = r.funcdata[:n:n] 377 pc.Funcdataoff = r.funcdataoff[:n:n] 378 if !isdup { 379 r.funcdata = r.funcdata[n:] 380 r.funcdataoff = r.funcdataoff[n:] 381 } 382 for i := 0; i < n; i++ { 383 pc.Funcdata[i] = r.readSymIndex() 384 } 385 for i := 0; i < n; i++ { 386 pc.Funcdataoff[i] = r.readInt64() 387 } 388 n = r.readInt() 389 pc.File = r.file[:n:n] 390 if !isdup { 391 r.file = r.file[n:] 392 } 393 for i := 0; i < n; i++ { 394 pc.File[i] = r.readSymIndex() 395 } 396 397 if !isdup { 398 if s.Attr.OnList() { 399 log.Fatalf("symbol %s listed multiple times", s.Name) 400 } 401 s.Attr |= AttrOnList 402 r.ctxt.Textp = append(r.ctxt.Textp, s) 403 } 404 } 405 if s.Type == obj.SDWARFINFO { 406 r.patchDWARFName(s) 407 } 408 } 409 410 func (r *objReader) patchDWARFName(s *Symbol) { 411 // This is kind of ugly. Really the package name should not 412 // even be included here. 413 if s.Size < 1 || s.P[0] != dwarf.DW_ABRV_FUNCTION { 414 return 415 } 416 e := bytes.IndexByte(s.P, 0) 417 if e == -1 { 418 return 419 } 420 p := bytes.Index(s.P[:e], emptyPkg) 421 if p == -1 { 422 return 423 } 424 pkgprefix := []byte(r.pkg + ".") 425 patched := bytes.Replace(s.P[:e], emptyPkg, pkgprefix, -1) 426 427 s.P = append(patched, s.P[e:]...) 428 delta := int64(len(s.P)) - s.Size 429 s.Size = int64(len(s.P)) 430 for i := range s.R { 431 r := &s.R[i] 432 if r.Off > int32(e) { 433 r.Off += int32(delta) 434 } 435 } 436 } 437 438 func (r *objReader) readFull(b []byte) { 439 _, err := io.ReadFull(r.rd, b) 440 if err != nil { 441 log.Fatalf("%s: error reading %s", r.pn, err) 442 } 443 } 444 445 func (r *objReader) readRef() { 446 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 447 log.Fatalf("readSym out of sync") 448 } 449 name := r.readSymName() 450 v := r.readInt() 451 if v != 0 && v != 1 { 452 log.Fatalf("invalid symbol version %d", v) 453 } 454 if v == 1 { 455 v = r.ctxt.Version 456 } 457 s := Linklookup(r.ctxt, name, v) 458 r.refs = append(r.refs, s) 459 460 if s == nil || v != 0 { 461 return 462 } 463 if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 { 464 x, err := strconv.ParseUint(s.Name[5:], 16, 64) 465 if err != nil { 466 log.Panicf("failed to parse $-symbol %s: %v", s.Name, err) 467 } 468 s.Type = obj.SRODATA 469 s.Attr |= AttrLocal 470 switch s.Name[:5] { 471 case "$f32.": 472 if uint64(uint32(x)) != x { 473 log.Panicf("$-symbol %s too large: %d", s.Name, x) 474 } 475 Adduint32(r.ctxt, s, uint32(x)) 476 case "$f64.", "$i64.": 477 Adduint64(r.ctxt, s, x) 478 default: 479 log.Panicf("unrecognized $-symbol: %s", s.Name) 480 } 481 s.Attr.Set(AttrReachable, false) 482 } 483 if strings.HasPrefix(s.Name, "runtime.gcbits.") { 484 s.Attr |= AttrLocal 485 } 486 } 487 488 func (r *objReader) readInt64() int64 { 489 uv := uint64(0) 490 for shift := uint(0); ; shift += 7 { 491 if shift >= 64 { 492 log.Fatalf("corrupt input") 493 } 494 c, err := r.rd.ReadByte() 495 if err != nil { 496 log.Fatalln("error reading input: ", err) 497 } 498 uv |= uint64(c&0x7F) << shift 499 if c&0x80 == 0 { 500 break 501 } 502 } 503 504 return int64(uv>>1) ^ (int64(uv<<63) >> 63) 505 } 506 507 func (r *objReader) readInt() int { 508 n := r.readInt64() 509 if int64(int(n)) != n { 510 log.Panicf("%v out of range for int", n) 511 } 512 return int(n) 513 } 514 515 func (r *objReader) readInt32() int32 { 516 n := r.readInt64() 517 if int64(int32(n)) != n { 518 log.Panicf("%v out of range for int32", n) 519 } 520 return int32(n) 521 } 522 523 func (r *objReader) readInt16() int16 { 524 n := r.readInt64() 525 if int64(int16(n)) != n { 526 log.Panicf("%v out of range for int16", n) 527 } 528 return int16(n) 529 } 530 531 func (r *objReader) readUint8() uint8 { 532 n := r.readInt64() 533 if int64(uint8(n)) != n { 534 log.Panicf("%v out of range for uint8", n) 535 } 536 return uint8(n) 537 } 538 539 func (r *objReader) readString() string { 540 n := r.readInt() 541 if cap(r.rdBuf) < n { 542 r.rdBuf = make([]byte, 2*n) 543 } 544 r.readFull(r.rdBuf[:n]) 545 return string(r.rdBuf[:n]) 546 } 547 548 func (r *objReader) readData() []byte { 549 n := r.readInt() 550 p := r.data[:n:n] 551 r.data = r.data[n:] 552 return p 553 } 554 555 // readSymName reads a symbol name, replacing all "". with pkg. 556 func (r *objReader) readSymName() string { 557 pkg := r.pkg 558 n := r.readInt() 559 if n == 0 { 560 r.readInt64() 561 return "" 562 } 563 if cap(r.rdBuf) < n { 564 r.rdBuf = make([]byte, 2*n) 565 } 566 origName, err := r.rd.Peek(n) 567 if err == bufio.ErrBufferFull { 568 // Long symbol names are rare but exist. One source is type 569 // symbols for types with long string forms. See #15104. 570 origName = make([]byte, n) 571 r.readFull(origName) 572 } else if err != nil { 573 log.Fatalf("%s: error reading symbol: %v", r.pn, err) 574 } 575 adjName := r.rdBuf[:0] 576 for { 577 i := bytes.Index(origName, emptyPkg) 578 if i == -1 { 579 s := string(append(adjName, origName...)) 580 // Read past the peeked origName, now that we're done with it, 581 // using the rfBuf (also no longer used) as the scratch space. 582 // TODO: use bufio.Reader.Discard if available instead? 583 if err == nil { 584 r.readFull(r.rdBuf[:n]) 585 } 586 r.rdBuf = adjName[:0] // in case 2*n wasn't enough 587 588 if r.ctxt.DynlinkingGo() { 589 // These types are included in the symbol 590 // table when dynamically linking. To keep 591 // binary size down, we replace the names 592 // with SHA-1 prefixes. 593 // 594 // Keep the type.. prefix, which parts of the 595 // linker (like the DWARF generator) know means 596 // the symbol is not decodable. 597 // 598 // Leave type.runtime. symbols alone, because 599 // other parts of the linker manipulates them, 600 // and also symbols whose names would not be 601 // shortened by this process. 602 if len(s) > 14 && strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") { 603 hash := sha1.Sum([]byte(s)) 604 prefix := "type." 605 if s[5] == '.' { 606 prefix = "type.." 607 } 608 s = prefix + base64.StdEncoding.EncodeToString(hash[:6]) 609 } 610 } 611 return s 612 } 613 adjName = append(adjName, origName[:i]...) 614 adjName = append(adjName, pkg...) 615 adjName = append(adjName, '.') 616 origName = origName[i+len(emptyPkg):] 617 } 618 } 619 620 // Reads the index of a symbol reference and resolves it to a symbol 621 func (r *objReader) readSymIndex() *Symbol { 622 i := r.readInt() 623 return r.refs[i] 624 }