github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/link/internal/ld/objfile.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ld
     6  
     7  // Reading of Go object files.
     8  //
     9  // Originally, Go object files were Plan 9 object files, but no longer.
    10  // Now they are more like standard object files, in that each symbol is defined
    11  // by an associated memory image (bytes) and a list of relocations to apply
    12  // during linking. We do not (yet?) use a standard file format, however.
    13  // For now, the format is chosen to be as simple as possible to read and write.
    14  // It may change for reasons of efficiency, or we may even switch to a
    15  // standard file format if there are compelling benefits to doing so.
    16  // See golang.org/s/go13linker for more background.
    17  //
    18  // The file format is:
    19  //
    20  //	- magic header: "\x00\x00go19ld"
    21  //	- byte 1 - version number
    22  //	- sequence of strings giving dependencies (imported packages)
    23  //	- empty string (marks end of sequence)
    24  //	- sequence of symbol references used by the defined symbols
    25  //	- byte 0xff (marks end of sequence)
    26  //	- sequence of integer lengths:
    27  //		- total data length
    28  //		- total number of relocations
    29  //		- total number of pcdata
    30  //		- total number of automatics
    31  //		- total number of funcdata
    32  //		- total number of files
    33  //	- data, the content of the defined symbols
    34  //	- sequence of defined symbols
    35  //	- byte 0xff (marks end of sequence)
    36  //	- magic footer: "\xff\xffgo19ld"
    37  //
    38  // All integers are stored in a zigzag varint format.
    39  // See golang.org/s/go12symtab for a definition.
    40  //
    41  // Data blocks and strings are both stored as an integer
    42  // followed by that many bytes.
    43  //
    44  // A symbol reference is a string name followed by a version.
    45  //
    46  // A symbol points to other symbols using an index into the symbol
    47  // reference sequence. Index 0 corresponds to a nil Object* pointer.
    48  // In the symbol layout described below "symref index" stands for this
    49  // index.
    50  //
    51  // Each symbol is laid out as the following fields (taken from Object*):
    52  //
    53  //	- byte 0xfe (sanity check for synchronization)
    54  //	- type [int]
    55  //	- name & version [symref index]
    56  //	- flags [int]
    57  //		1<<0 dupok
    58  //		1<<1 local
    59  //		1<<2 add to typelink table
    60  //	- size [int]
    61  //	- gotype [symref index]
    62  //	- p [data block]
    63  //	- nr [int]
    64  //	- r [nr relocations, sorted by off]
    65  //
    66  // If type == STEXT, there are a few more fields:
    67  //
    68  //	- args [int]
    69  //	- locals [int]
    70  //	- nosplit [int]
    71  //	- flags [int]
    72  //		1<<0 leaf
    73  //		1<<1 C function
    74  //		1<<2 function may call reflect.Type.Method
    75  //	- nlocal [int]
    76  //	- local [nlocal automatics]
    77  //	- pcln [pcln table]
    78  //
    79  // Each relocation has the encoding:
    80  //
    81  //	- off [int]
    82  //	- siz [int]
    83  //	- type [int]
    84  //	- add [int]
    85  //	- sym [symref index]
    86  //
    87  // Each local has the encoding:
    88  //
    89  //	- asym [symref index]
    90  //	- offset [int]
    91  //	- type [int]
    92  //	- gotype [symref index]
    93  //
    94  // The pcln table has the encoding:
    95  //
    96  //	- pcsp [data block]
    97  //	- pcfile [data block]
    98  //	- pcline [data block]
    99  //	- pcinline [data block]
   100  //	- npcdata [int]
   101  //	- pcdata [npcdata data blocks]
   102  //	- nfuncdata [int]
   103  //	- funcdata [nfuncdata symref index]
   104  //	- funcdatasym [nfuncdata ints]
   105  //	- nfile [int]
   106  //	- file [nfile symref index]
   107  //	- ninlinedcall [int]
   108  //	- inlinedcall [ninlinedcall int symref int symref]
   109  //
   110  // The file layout and meaning of type integers are architecture-independent.
   111  //
   112  // TODO(rsc): The file format is good for a first pass but needs work.
   113  //	- There are SymID in the object file that should really just be strings.
   114  
   115  import (
   116  	"bufio"
   117  	"bytes"
   118  	"cmd/internal/bio"
   119  	"cmd/internal/dwarf"
   120  	"cmd/internal/obj"
   121  	"crypto/sha1"
   122  	"encoding/base64"
   123  	"io"
   124  	"log"
   125  	"strconv"
   126  	"strings"
   127  )
   128  
   129  const (
   130  	startmagic = "\x00\x00go19ld"
   131  	endmagic   = "\xff\xffgo19ld"
   132  )
   133  
   134  var emptyPkg = []byte(`"".`)
   135  
   136  // objReader reads Go object files.
   137  type objReader struct {
   138  	rd              *bufio.Reader
   139  	ctxt            *Link
   140  	lib             *Library
   141  	pn              string
   142  	dupSym          *Symbol
   143  	localSymVersion int
   144  
   145  	// rdBuf is used by readString and readSymName as scratch for reading strings.
   146  	rdBuf []byte
   147  
   148  	// List of symbol references for the file being read.
   149  	refs        []*Symbol
   150  	data        []byte
   151  	reloc       []Reloc
   152  	pcdata      []Pcdata
   153  	autom       []Auto
   154  	funcdata    []*Symbol
   155  	funcdataoff []int64
   156  	file        []*Symbol
   157  }
   158  
   159  func LoadObjFile(ctxt *Link, f *bio.Reader, lib *Library, length int64, pn string) {
   160  
   161  	start := f.Offset()
   162  	r := &objReader{
   163  		rd:              f.Reader,
   164  		lib:             lib,
   165  		ctxt:            ctxt,
   166  		pn:              pn,
   167  		dupSym:          &Symbol{Name: ".dup"},
   168  		localSymVersion: ctxt.Syms.IncVersion(),
   169  	}
   170  	r.loadObjFile()
   171  	if f.Offset() != start+length {
   172  		log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length)
   173  	}
   174  }
   175  
   176  func (r *objReader) loadObjFile() {
   177  	pkg := pathtoprefix(r.lib.Pkg)
   178  
   179  	// Magic header
   180  	var buf [8]uint8
   181  	r.readFull(buf[:])
   182  	if string(buf[:]) != startmagic {
   183  		log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
   184  	}
   185  
   186  	// Version
   187  	c, err := r.rd.ReadByte()
   188  	if err != nil || c != 1 {
   189  		log.Fatalf("%s: invalid file version number %d", r.pn, c)
   190  	}
   191  
   192  	// Autolib
   193  	for {
   194  		lib := r.readString()
   195  		if lib == "" {
   196  			break
   197  		}
   198  		l := addlib(r.ctxt, pkg, r.pn, lib)
   199  		if l != nil {
   200  			r.lib.imports = append(r.lib.imports, l)
   201  		}
   202  	}
   203  
   204  	// Symbol references
   205  	r.refs = []*Symbol{nil} // zeroth ref is nil
   206  	for {
   207  		c, err := r.rd.Peek(1)
   208  		if err != nil {
   209  			log.Fatalf("%s: peeking: %v", r.pn, err)
   210  		}
   211  		if c[0] == 0xff {
   212  			r.rd.ReadByte()
   213  			break
   214  		}
   215  		r.readRef()
   216  	}
   217  
   218  	// Lengths
   219  	r.readSlices()
   220  
   221  	// Data section
   222  	r.readFull(r.data)
   223  
   224  	// Defined symbols
   225  	for {
   226  		c, err := r.rd.Peek(1)
   227  		if err != nil {
   228  			log.Fatalf("%s: peeking: %v", r.pn, err)
   229  		}
   230  		if c[0] == 0xff {
   231  			break
   232  		}
   233  		r.readSym()
   234  	}
   235  
   236  	// Magic footer
   237  	buf = [8]uint8{}
   238  	r.readFull(buf[:])
   239  	if string(buf[:]) != endmagic {
   240  		log.Fatalf("%s: invalid file end", r.pn)
   241  	}
   242  }
   243  
   244  func (r *objReader) readSlices() {
   245  	n := r.readInt()
   246  	r.data = make([]byte, n)
   247  	n = r.readInt()
   248  	r.reloc = make([]Reloc, n)
   249  	n = r.readInt()
   250  	r.pcdata = make([]Pcdata, n)
   251  	n = r.readInt()
   252  	r.autom = make([]Auto, n)
   253  	n = r.readInt()
   254  	r.funcdata = make([]*Symbol, n)
   255  	r.funcdataoff = make([]int64, n)
   256  	n = r.readInt()
   257  	r.file = make([]*Symbol, n)
   258  }
   259  
   260  // Symbols are prefixed so their content doesn't get confused with the magic footer.
   261  const symPrefix = 0xfe
   262  
   263  func (r *objReader) readSym() {
   264  	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
   265  		log.Fatalln("readSym out of sync")
   266  	}
   267  	t := obj.SymKind(r.readInt())
   268  	s := r.readSymIndex()
   269  	flags := r.readInt()
   270  	dupok := flags&1 != 0
   271  	local := flags&2 != 0
   272  	makeTypelink := flags&4 != 0
   273  	size := r.readInt()
   274  	typ := r.readSymIndex()
   275  	data := r.readData()
   276  	nreloc := r.readInt()
   277  	pkg := pathtoprefix(r.lib.Pkg)
   278  	isdup := false
   279  
   280  	var dup *Symbol
   281  	if s.Type != 0 && s.Type != obj.SXREF {
   282  		if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 {
   283  			if s.Size < int64(size) {
   284  				s.Size = int64(size)
   285  			}
   286  			if typ != nil && s.Gotype == nil {
   287  				s.Gotype = typ
   288  			}
   289  			return
   290  		}
   291  
   292  		if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 {
   293  			goto overwrite
   294  		}
   295  		if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() {
   296  			log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn)
   297  		}
   298  		if len(s.P) > 0 {
   299  			dup = s
   300  			s = r.dupSym
   301  			isdup = true
   302  		}
   303  	}
   304  
   305  overwrite:
   306  	s.File = pkg
   307  	if dupok {
   308  		s.Attr |= AttrDuplicateOK
   309  	}
   310  	if t == obj.SXREF {
   311  		log.Fatalf("bad sxref")
   312  	}
   313  	if t == 0 {
   314  		log.Fatalf("missing type for %s in %s", s.Name, r.pn)
   315  	}
   316  	if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) {
   317  		t = s.Type
   318  	}
   319  	s.Type = t
   320  	if s.Size < int64(size) {
   321  		s.Size = int64(size)
   322  	}
   323  	s.Attr.Set(AttrLocal, local)
   324  	s.Attr.Set(AttrMakeTypelink, makeTypelink)
   325  	if typ != nil {
   326  		s.Gotype = typ
   327  	}
   328  	if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def
   329  		dup.Gotype = typ
   330  	}
   331  	s.P = data
   332  	if nreloc > 0 {
   333  		s.R = r.reloc[:nreloc:nreloc]
   334  		if !isdup {
   335  			r.reloc = r.reloc[nreloc:]
   336  		}
   337  
   338  		for i := 0; i < nreloc; i++ {
   339  			s.R[i] = Reloc{
   340  				Off:  r.readInt32(),
   341  				Siz:  r.readUint8(),
   342  				Type: obj.RelocType(r.readInt32()),
   343  				Add:  r.readInt64(),
   344  				Sym:  r.readSymIndex(),
   345  			}
   346  		}
   347  	}
   348  
   349  	if s.Type == obj.STEXT {
   350  		s.FuncInfo = new(FuncInfo)
   351  		pc := s.FuncInfo
   352  
   353  		pc.Args = r.readInt32()
   354  		pc.Locals = r.readInt32()
   355  		if r.readUint8() != 0 {
   356  			s.Attr |= AttrNoSplit
   357  		}
   358  		flags := r.readInt()
   359  		if flags&(1<<2) != 0 {
   360  			s.Attr |= AttrReflectMethod
   361  		}
   362  		n := r.readInt()
   363  		pc.Autom = r.autom[:n:n]
   364  		if !isdup {
   365  			r.autom = r.autom[n:]
   366  		}
   367  
   368  		for i := 0; i < n; i++ {
   369  			pc.Autom[i] = Auto{
   370  				Asym:    r.readSymIndex(),
   371  				Aoffset: r.readInt32(),
   372  				Name:    r.readInt16(),
   373  				Gotype:  r.readSymIndex(),
   374  			}
   375  		}
   376  
   377  		pc.Pcsp.P = r.readData()
   378  		pc.Pcfile.P = r.readData()
   379  		pc.Pcline.P = r.readData()
   380  		pc.Pcinline.P = r.readData()
   381  		n = r.readInt()
   382  		pc.Pcdata = r.pcdata[:n:n]
   383  		if !isdup {
   384  			r.pcdata = r.pcdata[n:]
   385  		}
   386  		for i := 0; i < n; i++ {
   387  			pc.Pcdata[i].P = r.readData()
   388  		}
   389  		n = r.readInt()
   390  		pc.Funcdata = r.funcdata[:n:n]
   391  		pc.Funcdataoff = r.funcdataoff[:n:n]
   392  		if !isdup {
   393  			r.funcdata = r.funcdata[n:]
   394  			r.funcdataoff = r.funcdataoff[n:]
   395  		}
   396  		for i := 0; i < n; i++ {
   397  			pc.Funcdata[i] = r.readSymIndex()
   398  		}
   399  		for i := 0; i < n; i++ {
   400  			pc.Funcdataoff[i] = r.readInt64()
   401  		}
   402  		n = r.readInt()
   403  		pc.File = r.file[:n:n]
   404  		if !isdup {
   405  			r.file = r.file[n:]
   406  		}
   407  		for i := 0; i < n; i++ {
   408  			pc.File[i] = r.readSymIndex()
   409  		}
   410  		n = r.readInt()
   411  		pc.InlTree = make([]InlinedCall, n)
   412  		for i := 0; i < n; i++ {
   413  			pc.InlTree[i].Parent = r.readInt32()
   414  			pc.InlTree[i].File = r.readSymIndex()
   415  			pc.InlTree[i].Line = r.readInt32()
   416  			pc.InlTree[i].Func = r.readSymIndex()
   417  		}
   418  
   419  		if !dupok {
   420  			if s.Attr.OnList() {
   421  				log.Fatalf("symbol %s listed multiple times", s.Name)
   422  			}
   423  			s.Attr |= AttrOnList
   424  			r.lib.textp = append(r.lib.textp, s)
   425  		} else {
   426  			// there may ba a dup in another package
   427  			// put into a temp list and add to text later
   428  			if !isdup {
   429  				r.lib.dupTextSyms = append(r.lib.dupTextSyms, s)
   430  			} else {
   431  				r.lib.dupTextSyms = append(r.lib.dupTextSyms, dup)
   432  			}
   433  		}
   434  	}
   435  	if s.Type == obj.SDWARFINFO {
   436  		r.patchDWARFName(s)
   437  	}
   438  }
   439  
   440  func (r *objReader) patchDWARFName(s *Symbol) {
   441  	// This is kind of ugly. Really the package name should not
   442  	// even be included here.
   443  	if s.Size < 1 || s.P[0] != dwarf.DW_ABRV_FUNCTION {
   444  		return
   445  	}
   446  	e := bytes.IndexByte(s.P, 0)
   447  	if e == -1 {
   448  		return
   449  	}
   450  	p := bytes.Index(s.P[:e], emptyPkg)
   451  	if p == -1 {
   452  		return
   453  	}
   454  	pkgprefix := []byte(pathtoprefix(r.lib.Pkg) + ".")
   455  	patched := bytes.Replace(s.P[:e], emptyPkg, pkgprefix, -1)
   456  
   457  	s.P = append(patched, s.P[e:]...)
   458  	delta := int64(len(s.P)) - s.Size
   459  	s.Size = int64(len(s.P))
   460  	for i := range s.R {
   461  		r := &s.R[i]
   462  		if r.Off > int32(e) {
   463  			r.Off += int32(delta)
   464  		}
   465  	}
   466  }
   467  
   468  func (r *objReader) readFull(b []byte) {
   469  	_, err := io.ReadFull(r.rd, b)
   470  	if err != nil {
   471  		log.Fatalf("%s: error reading %s", r.pn, err)
   472  	}
   473  }
   474  
   475  func (r *objReader) readRef() {
   476  	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
   477  		log.Fatalf("readSym out of sync")
   478  	}
   479  	name := r.readSymName()
   480  	v := r.readInt()
   481  	if v != 0 && v != 1 {
   482  		log.Fatalf("invalid symbol version %d", v)
   483  	}
   484  	if v == 1 {
   485  		v = r.localSymVersion
   486  	}
   487  	s := r.ctxt.Syms.Lookup(name, v)
   488  	r.refs = append(r.refs, s)
   489  
   490  	if s == nil || v != 0 {
   491  		return
   492  	}
   493  	if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 {
   494  		x, err := strconv.ParseUint(s.Name[5:], 16, 64)
   495  		if err != nil {
   496  			log.Panicf("failed to parse $-symbol %s: %v", s.Name, err)
   497  		}
   498  		s.Type = obj.SRODATA
   499  		s.Attr |= AttrLocal
   500  		switch s.Name[:5] {
   501  		case "$f32.":
   502  			if uint64(uint32(x)) != x {
   503  				log.Panicf("$-symbol %s too large: %d", s.Name, x)
   504  			}
   505  			Adduint32(r.ctxt, s, uint32(x))
   506  		case "$f64.", "$i64.":
   507  			Adduint64(r.ctxt, s, x)
   508  		default:
   509  			log.Panicf("unrecognized $-symbol: %s", s.Name)
   510  		}
   511  		s.Attr.Set(AttrReachable, false)
   512  	}
   513  	if strings.HasPrefix(s.Name, "runtime.gcbits.") {
   514  		s.Attr |= AttrLocal
   515  	}
   516  }
   517  
   518  func (r *objReader) readInt64() int64 {
   519  	uv := uint64(0)
   520  	for shift := uint(0); ; shift += 7 {
   521  		if shift >= 64 {
   522  			log.Fatalf("corrupt input")
   523  		}
   524  		c, err := r.rd.ReadByte()
   525  		if err != nil {
   526  			log.Fatalln("error reading input: ", err)
   527  		}
   528  		uv |= uint64(c&0x7F) << shift
   529  		if c&0x80 == 0 {
   530  			break
   531  		}
   532  	}
   533  
   534  	return int64(uv>>1) ^ (int64(uv<<63) >> 63)
   535  }
   536  
   537  func (r *objReader) readInt() int {
   538  	n := r.readInt64()
   539  	if int64(int(n)) != n {
   540  		log.Panicf("%v out of range for int", n)
   541  	}
   542  	return int(n)
   543  }
   544  
   545  func (r *objReader) readInt32() int32 {
   546  	n := r.readInt64()
   547  	if int64(int32(n)) != n {
   548  		log.Panicf("%v out of range for int32", n)
   549  	}
   550  	return int32(n)
   551  }
   552  
   553  func (r *objReader) readInt16() int16 {
   554  	n := r.readInt64()
   555  	if int64(int16(n)) != n {
   556  		log.Panicf("%v out of range for int16", n)
   557  	}
   558  	return int16(n)
   559  }
   560  
   561  func (r *objReader) readUint8() uint8 {
   562  	n := r.readInt64()
   563  	if int64(uint8(n)) != n {
   564  		log.Panicf("%v out of range for uint8", n)
   565  	}
   566  	return uint8(n)
   567  }
   568  
   569  func (r *objReader) readString() string {
   570  	n := r.readInt()
   571  	if cap(r.rdBuf) < n {
   572  		r.rdBuf = make([]byte, 2*n)
   573  	}
   574  	r.readFull(r.rdBuf[:n])
   575  	return string(r.rdBuf[:n])
   576  }
   577  
   578  func (r *objReader) readData() []byte {
   579  	n := r.readInt()
   580  	p := r.data[:n:n]
   581  	r.data = r.data[n:]
   582  	return p
   583  }
   584  
   585  // readSymName reads a symbol name, replacing all "". with pkg.
   586  func (r *objReader) readSymName() string {
   587  	pkg := pathtoprefix(r.lib.Pkg)
   588  	n := r.readInt()
   589  	if n == 0 {
   590  		r.readInt64()
   591  		return ""
   592  	}
   593  	if cap(r.rdBuf) < n {
   594  		r.rdBuf = make([]byte, 2*n)
   595  	}
   596  	origName, err := r.rd.Peek(n)
   597  	if err == bufio.ErrBufferFull {
   598  		// Long symbol names are rare but exist. One source is type
   599  		// symbols for types with long string forms. See #15104.
   600  		origName = make([]byte, n)
   601  		r.readFull(origName)
   602  	} else if err != nil {
   603  		log.Fatalf("%s: error reading symbol: %v", r.pn, err)
   604  	}
   605  	adjName := r.rdBuf[:0]
   606  	for {
   607  		i := bytes.Index(origName, emptyPkg)
   608  		if i == -1 {
   609  			s := string(append(adjName, origName...))
   610  			// Read past the peeked origName, now that we're done with it,
   611  			// using the rfBuf (also no longer used) as the scratch space.
   612  			// TODO: use bufio.Reader.Discard if available instead?
   613  			if err == nil {
   614  				r.readFull(r.rdBuf[:n])
   615  			}
   616  			r.rdBuf = adjName[:0] // in case 2*n wasn't enough
   617  
   618  			if Buildmode == BuildmodeShared || *FlagLinkshared {
   619  				// These types are included in the symbol
   620  				// table when dynamically linking. To keep
   621  				// binary size down, we replace the names
   622  				// with SHA-1 prefixes.
   623  				//
   624  				// Keep the type.. prefix, which parts of the
   625  				// linker (like the DWARF generator) know means
   626  				// the symbol is not decodable.
   627  				//
   628  				// Leave type.runtime. symbols alone, because
   629  				// other parts of the linker manipulates them,
   630  				// and also symbols whose names would not be
   631  				// shortened by this process.
   632  				if len(s) > 14 && strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") {
   633  					hash := sha1.Sum([]byte(s))
   634  					prefix := "type."
   635  					if s[5] == '.' {
   636  						prefix = "type.."
   637  					}
   638  					s = prefix + base64.StdEncoding.EncodeToString(hash[:6])
   639  				}
   640  			}
   641  			return s
   642  		}
   643  		adjName = append(adjName, origName[:i]...)
   644  		adjName = append(adjName, pkg...)
   645  		adjName = append(adjName, '.')
   646  		origName = origName[i+len(emptyPkg):]
   647  	}
   648  }
   649  
   650  // Reads the index of a symbol reference and resolves it to a symbol
   651  func (r *objReader) readSymIndex() *Symbol {
   652  	i := r.readInt()
   653  	return r.refs[i]
   654  }