github.com/gocuntian/go@v0.0.0-20160610041250-fee02d270bf8/src/cmd/link/internal/ld/objfile.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ld 6 7 // Reading of Go object files. 8 // 9 // Originally, Go object files were Plan 9 object files, but no longer. 10 // Now they are more like standard object files, in that each symbol is defined 11 // by an associated memory image (bytes) and a list of relocations to apply 12 // during linking. We do not (yet?) use a standard file format, however. 13 // For now, the format is chosen to be as simple as possible to read and write. 14 // It may change for reasons of efficiency, or we may even switch to a 15 // standard file format if there are compelling benefits to doing so. 16 // See golang.org/s/go13linker for more background. 17 // 18 // The file format is: 19 // 20 // - magic header: "\x00\x00go17ld" 21 // - byte 1 - version number 22 // - sequence of strings giving dependencies (imported packages) 23 // - empty string (marks end of sequence) 24 // - sequence of symbol references used by the defined symbols 25 // - byte 0xff (marks end of sequence) 26 // - sequence of integer lengths: 27 // - total data length 28 // - total number of relocations 29 // - total number of pcdata 30 // - total number of automatics 31 // - total number of funcdata 32 // - total number of files 33 // - data, the content of the defined symbols 34 // - sequence of defined symbols 35 // - byte 0xff (marks end of sequence) 36 // - magic footer: "\xff\xffgo17ld" 37 // 38 // All integers are stored in a zigzag varint format. 39 // See golang.org/s/go12symtab for a definition. 40 // 41 // Data blocks and strings are both stored as an integer 42 // followed by that many bytes. 43 // 44 // A symbol reference is a string name followed by a version. 45 // 46 // A symbol points to other symbols using an index into the symbol 47 // reference sequence. Index 0 corresponds to a nil LSym* pointer. 48 // In the symbol layout described below "symref index" stands for this 49 // index. 50 // 51 // Each symbol is laid out as the following fields (taken from LSym*): 52 // 53 // - byte 0xfe (sanity check for synchronization) 54 // - type [int] 55 // - name & version [symref index] 56 // - flags [int] 57 // 1 dupok 58 // - size [int] 59 // - gotype [symref index] 60 // - p [data block] 61 // - nr [int] 62 // - r [nr relocations, sorted by off] 63 // 64 // If type == STEXT, there are a few more fields: 65 // 66 // - args [int] 67 // - locals [int] 68 // - nosplit [int] 69 // - flags [int] 70 // 1<<0 leaf 71 // 1<<1 C function 72 // 1<<2 function may call reflect.Type.Method 73 // - nlocal [int] 74 // - local [nlocal automatics] 75 // - pcln [pcln table] 76 // 77 // Each relocation has the encoding: 78 // 79 // - off [int] 80 // - siz [int] 81 // - type [int] 82 // - add [int] 83 // - sym [symref index] 84 // 85 // Each local has the encoding: 86 // 87 // - asym [symref index] 88 // - offset [int] 89 // - type [int] 90 // - gotype [symref index] 91 // 92 // The pcln table has the encoding: 93 // 94 // - pcsp [data block] 95 // - pcfile [data block] 96 // - pcline [data block] 97 // - npcdata [int] 98 // - pcdata [npcdata data blocks] 99 // - nfuncdata [int] 100 // - funcdata [nfuncdata symref index] 101 // - funcdatasym [nfuncdata ints] 102 // - nfile [int] 103 // - file [nfile symref index] 104 // 105 // The file layout and meaning of type integers are architecture-independent. 106 // 107 // TODO(rsc): The file format is good for a first pass but needs work. 108 // - There are SymID in the object file that should really just be strings. 109 110 import ( 111 "bufio" 112 "bytes" 113 "cmd/internal/bio" 114 "cmd/internal/obj" 115 "crypto/sha1" 116 "encoding/base64" 117 "io" 118 "log" 119 "strconv" 120 "strings" 121 ) 122 123 const ( 124 startmagic = "\x00\x00go17ld" 125 endmagic = "\xff\xffgo17ld" 126 ) 127 128 var emptyPkg = []byte(`"".`) 129 130 // objReader reads Go object files. 131 type objReader struct { 132 rd *bufio.Reader 133 ctxt *Link 134 pkg string 135 pn string 136 // List of symbol references for the file being read. 137 dupSym *LSym 138 139 // rdBuf is used by readString and readSymName as scratch for reading strings. 140 rdBuf []byte 141 142 refs []*LSym 143 data []byte 144 reloc []Reloc 145 pcdata []Pcdata 146 autom []Auto 147 funcdata []*LSym 148 funcdataoff []int64 149 file []*LSym 150 } 151 152 func LoadObjFile(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { 153 start := f.Offset() 154 r := &objReader{ 155 rd: f.Reader, 156 pkg: pkg, 157 ctxt: ctxt, 158 pn: pn, 159 dupSym: &LSym{Name: ".dup"}, 160 } 161 r.loadObjFile() 162 if f.Offset() != start+length { 163 log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length) 164 } 165 } 166 167 func (r *objReader) loadObjFile() { 168 // Increment context version, versions are used to differentiate static files in different packages 169 r.ctxt.IncVersion() 170 171 // Magic header 172 var buf [8]uint8 173 r.readFull(buf[:]) 174 if string(buf[:]) != startmagic { 175 log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]) 176 } 177 178 // Version 179 c, err := r.rd.ReadByte() 180 if err != nil || c != 1 { 181 log.Fatalf("%s: invalid file version number %d", r.pn, c) 182 } 183 184 // Autolib 185 for { 186 lib := r.readString() 187 if lib == "" { 188 break 189 } 190 addlib(r.ctxt, r.pkg, r.pn, lib) 191 } 192 193 // Symbol references 194 r.refs = []*LSym{nil} // zeroth ref is nil 195 for { 196 c, err := r.rd.Peek(1) 197 if err != nil { 198 log.Fatalf("%s: peeking: %v", r.pn, err) 199 } 200 if c[0] == 0xff { 201 r.rd.ReadByte() 202 break 203 } 204 r.readRef() 205 } 206 207 // Lengths 208 r.readSlices() 209 210 // Data section 211 r.readFull(r.data) 212 213 // Defined symbols 214 for { 215 c, err := r.rd.Peek(1) 216 if err != nil { 217 log.Fatalf("%s: peeking: %v", r.pn, err) 218 } 219 if c[0] == 0xff { 220 break 221 } 222 r.readSym() 223 } 224 225 // Magic footer 226 buf = [8]uint8{} 227 r.readFull(buf[:]) 228 if string(buf[:]) != endmagic { 229 log.Fatalf("%s: invalid file end", r.pn) 230 } 231 } 232 233 func (r *objReader) readSlices() { 234 n := r.readInt() 235 r.data = make([]byte, n) 236 n = r.readInt() 237 r.reloc = make([]Reloc, n) 238 n = r.readInt() 239 r.pcdata = make([]Pcdata, n) 240 n = r.readInt() 241 r.autom = make([]Auto, n) 242 n = r.readInt() 243 r.funcdata = make([]*LSym, n) 244 r.funcdataoff = make([]int64, n) 245 n = r.readInt() 246 r.file = make([]*LSym, n) 247 } 248 249 // Symbols are prefixed so their content doesn't get confused with the magic footer. 250 const symPrefix = 0xfe 251 252 func (r *objReader) readSym() { 253 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 254 log.Fatalln("readSym out of sync") 255 } 256 t := r.readInt() 257 s := r.readSymIndex() 258 flags := r.readInt() 259 dupok := flags&1 != 0 260 local := flags&2 != 0 261 size := r.readInt() 262 typ := r.readSymIndex() 263 data := r.readData() 264 nreloc := r.readInt() 265 isdup := false 266 267 var dup *LSym 268 if s.Type != 0 && s.Type != obj.SXREF { 269 if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 { 270 if s.Size < int64(size) { 271 s.Size = int64(size) 272 } 273 if typ != nil && s.Gotype == nil { 274 s.Gotype = typ 275 } 276 return 277 } 278 279 if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 { 280 goto overwrite 281 } 282 if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() { 283 log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn) 284 } 285 if len(s.P) > 0 { 286 dup = s 287 s = r.dupSym 288 isdup = true 289 } 290 } 291 292 overwrite: 293 s.File = r.pkg 294 if dupok { 295 s.Attr |= AttrDuplicateOK 296 } 297 if t == obj.SXREF { 298 log.Fatalf("bad sxref") 299 } 300 if t == 0 { 301 log.Fatalf("missing type for %s in %s", s.Name, r.pn) 302 } 303 if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) { 304 t = int(s.Type) 305 } 306 s.Type = int16(t) 307 if s.Size < int64(size) { 308 s.Size = int64(size) 309 } 310 s.Attr.Set(AttrLocal, local) 311 if typ != nil { 312 s.Gotype = typ 313 } 314 if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def 315 dup.Gotype = typ 316 } 317 s.P = data 318 if nreloc > 0 { 319 s.R = r.reloc[:nreloc:nreloc] 320 if !isdup { 321 r.reloc = r.reloc[nreloc:] 322 } 323 324 for i := 0; i < nreloc; i++ { 325 s.R[i] = Reloc{ 326 Off: r.readInt32(), 327 Siz: r.readUint8(), 328 Type: r.readInt32(), 329 Add: r.readInt64(), 330 Sym: r.readSymIndex(), 331 } 332 } 333 } 334 335 if s.Type == obj.STEXT { 336 s.FuncInfo = new(FuncInfo) 337 pc := s.FuncInfo 338 339 pc.Args = r.readInt32() 340 pc.Locals = r.readInt32() 341 if r.readUint8() != 0 { 342 s.Attr |= AttrNoSplit 343 } 344 flags := r.readInt() 345 if flags&(1<<2) != 0 { 346 s.Attr |= AttrReflectMethod 347 } 348 n := r.readInt() 349 pc.Autom = r.autom[:n:n] 350 if !isdup { 351 r.autom = r.autom[n:] 352 } 353 354 for i := 0; i < n; i++ { 355 pc.Autom[i] = Auto{ 356 Asym: r.readSymIndex(), 357 Aoffset: r.readInt32(), 358 Name: r.readInt16(), 359 Gotype: r.readSymIndex(), 360 } 361 } 362 363 pc.Pcsp.P = r.readData() 364 pc.Pcfile.P = r.readData() 365 pc.Pcline.P = r.readData() 366 n = r.readInt() 367 pc.Pcdata = r.pcdata[:n:n] 368 if !isdup { 369 r.pcdata = r.pcdata[n:] 370 } 371 for i := 0; i < n; i++ { 372 pc.Pcdata[i].P = r.readData() 373 } 374 n = r.readInt() 375 pc.Funcdata = r.funcdata[:n:n] 376 pc.Funcdataoff = r.funcdataoff[:n:n] 377 if !isdup { 378 r.funcdata = r.funcdata[n:] 379 r.funcdataoff = r.funcdataoff[n:] 380 } 381 for i := 0; i < n; i++ { 382 pc.Funcdata[i] = r.readSymIndex() 383 } 384 for i := 0; i < n; i++ { 385 pc.Funcdataoff[i] = r.readInt64() 386 } 387 n = r.readInt() 388 pc.File = r.file[:n:n] 389 if !isdup { 390 r.file = r.file[n:] 391 } 392 for i := 0; i < n; i++ { 393 pc.File[i] = r.readSymIndex() 394 } 395 396 if !isdup { 397 if s.Attr.OnList() { 398 log.Fatalf("symbol %s listed multiple times", s.Name) 399 } 400 s.Attr |= AttrOnList 401 r.ctxt.Textp = append(r.ctxt.Textp, s) 402 } 403 } 404 } 405 406 func (r *objReader) readFull(b []byte) { 407 _, err := io.ReadFull(r.rd, b) 408 if err != nil { 409 log.Fatalf("%s: error reading %s", r.pn, err) 410 } 411 } 412 413 func (r *objReader) readRef() { 414 if c, err := r.rd.ReadByte(); c != symPrefix || err != nil { 415 log.Fatalf("readSym out of sync") 416 } 417 name := r.readSymName() 418 v := r.readInt() 419 if v != 0 && v != 1 { 420 log.Fatalf("invalid symbol version %d", v) 421 } 422 if v == 1 { 423 v = r.ctxt.Version 424 } 425 s := Linklookup(r.ctxt, name, v) 426 r.refs = append(r.refs, s) 427 428 if s == nil || v != 0 { 429 return 430 } 431 if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 { 432 x, err := strconv.ParseUint(s.Name[5:], 16, 64) 433 if err != nil { 434 log.Panicf("failed to parse $-symbol %s: %v", s.Name, err) 435 } 436 s.Type = obj.SRODATA 437 s.Attr |= AttrLocal 438 switch s.Name[:5] { 439 case "$f32.": 440 if uint64(uint32(x)) != x { 441 log.Panicf("$-symbol %s too large: %d", s.Name, x) 442 } 443 Adduint32(r.ctxt, s, uint32(x)) 444 case "$f64.", "$i64.": 445 Adduint64(r.ctxt, s, x) 446 default: 447 log.Panicf("unrecognized $-symbol: %s", s.Name) 448 } 449 s.Attr.Set(AttrReachable, false) 450 } 451 if strings.HasPrefix(s.Name, "runtime.gcbits.") { 452 s.Attr |= AttrLocal 453 } 454 } 455 456 func (r *objReader) readInt64() int64 { 457 uv := uint64(0) 458 for shift := uint(0); ; shift += 7 { 459 if shift >= 64 { 460 log.Fatalf("corrupt input") 461 } 462 c, err := r.rd.ReadByte() 463 if err != nil { 464 log.Fatalln("error reading input: ", err) 465 } 466 uv |= uint64(c&0x7F) << shift 467 if c&0x80 == 0 { 468 break 469 } 470 } 471 472 return int64(uv>>1) ^ (int64(uv<<63) >> 63) 473 } 474 475 func (r *objReader) readInt() int { 476 n := r.readInt64() 477 if int64(int(n)) != n { 478 log.Panicf("%v out of range for int", n) 479 } 480 return int(n) 481 } 482 483 func (r *objReader) readInt32() int32 { 484 n := r.readInt64() 485 if int64(int32(n)) != n { 486 log.Panicf("%v out of range for int32", n) 487 } 488 return int32(n) 489 } 490 491 func (r *objReader) readInt16() int16 { 492 n := r.readInt64() 493 if int64(int16(n)) != n { 494 log.Panicf("%v out of range for int16", n) 495 } 496 return int16(n) 497 } 498 499 func (r *objReader) readUint8() uint8 { 500 n := r.readInt64() 501 if int64(uint8(n)) != n { 502 log.Panicf("%v out of range for uint8", n) 503 } 504 return uint8(n) 505 } 506 507 func (r *objReader) readString() string { 508 n := r.readInt() 509 if cap(r.rdBuf) < n { 510 r.rdBuf = make([]byte, 2*n) 511 } 512 r.readFull(r.rdBuf[:n]) 513 return string(r.rdBuf[:n]) 514 } 515 516 func (r *objReader) readData() []byte { 517 n := r.readInt() 518 p := r.data[:n:n] 519 r.data = r.data[n:] 520 return p 521 } 522 523 // readSymName reads a symbol name, replacing all "". with pkg. 524 func (r *objReader) readSymName() string { 525 pkg := r.pkg 526 n := r.readInt() 527 if n == 0 { 528 r.readInt64() 529 return "" 530 } 531 if cap(r.rdBuf) < n { 532 r.rdBuf = make([]byte, 2*n) 533 } 534 origName, err := r.rd.Peek(n) 535 if err == bufio.ErrBufferFull { 536 // Long symbol names are rare but exist. One source is type 537 // symbols for types with long string forms. See #15104. 538 origName = make([]byte, n) 539 r.readFull(origName) 540 } else if err != nil { 541 log.Fatalf("%s: error reading symbol: %v", r.pn, err) 542 } 543 adjName := r.rdBuf[:0] 544 for { 545 i := bytes.Index(origName, emptyPkg) 546 if i == -1 { 547 s := string(append(adjName, origName...)) 548 // Read past the peeked origName, now that we're done with it, 549 // using the rfBuf (also no longer used) as the scratch space. 550 // TODO: use bufio.Reader.Discard if available instead? 551 if err == nil { 552 r.readFull(r.rdBuf[:n]) 553 } 554 r.rdBuf = adjName[:0] // in case 2*n wasn't enough 555 556 if DynlinkingGo() { 557 // These types are included in the symbol 558 // table when dynamically linking. To keep 559 // binary size down, we replace the names 560 // with SHA-1 prefixes. 561 // 562 // Keep the type.. prefix, which parts of the 563 // linker (like the DWARF generator) know means 564 // the symbol is not decodable. 565 // 566 // Leave type.runtime. symbols alone, because 567 // other parts of the linker manipulates them. 568 if strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") { 569 hash := sha1.Sum([]byte(s)) 570 prefix := "type." 571 if s[5] == '.' { 572 prefix = "type.." 573 } 574 s = prefix + base64.StdEncoding.EncodeToString(hash[:6]) 575 } 576 } 577 return s 578 } 579 adjName = append(adjName, origName[:i]...) 580 adjName = append(adjName, pkg...) 581 adjName = append(adjName, '.') 582 origName = origName[i+len(emptyPkg):] 583 } 584 } 585 586 // Reads the index of a symbol reference and resolves it to a symbol 587 func (r *objReader) readSymIndex() *LSym { 588 i := r.readInt() 589 return r.refs[i] 590 }