github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/cmd/link/internal/ld/objfile.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ld
     6  
     7  // Reading of Go object files.
     8  //
     9  // Originally, Go object files were Plan 9 object files, but no longer.
    10  // Now they are more like standard object files, in that each symbol is defined
    11  // by an associated memory image (bytes) and a list of relocations to apply
    12  // during linking. We do not (yet?) use a standard file format, however.
    13  // For now, the format is chosen to be as simple as possible to read and write.
    14  // It may change for reasons of efficiency, or we may even switch to a
    15  // standard file format if there are compelling benefits to doing so.
    16  // See golang.org/s/go13linker for more background.
    17  //
    18  // The file format is:
    19  //
    20  //	- magic header: "\x00\x00go17ld"
    21  //	- byte 1 - version number
    22  //	- sequence of strings giving dependencies (imported packages)
    23  //	- empty string (marks end of sequence)
    24  //	- sequence of symbol references used by the defined symbols
    25  //	- byte 0xff (marks end of sequence)
    26  //	- sequence of integer lengths:
    27  //		- total data length
    28  //		- total number of relocations
    29  //		- total number of pcdata
    30  //		- total number of automatics
    31  //		- total number of funcdata
    32  //		- total number of files
    33  //	- data, the content of the defined symbols
    34  //	- sequence of defined symbols
    35  //	- byte 0xff (marks end of sequence)
    36  //	- magic footer: "\xff\xffgo17ld"
    37  //
    38  // All integers are stored in a zigzag varint format.
    39  // See golang.org/s/go12symtab for a definition.
    40  //
    41  // Data blocks and strings are both stored as an integer
    42  // followed by that many bytes.
    43  //
    44  // A symbol reference is a string name followed by a version.
    45  //
    46  // A symbol points to other symbols using an index into the symbol
    47  // reference sequence. Index 0 corresponds to a nil LSym* pointer.
    48  // In the symbol layout described below "symref index" stands for this
    49  // index.
    50  //
    51  // Each symbol is laid out as the following fields (taken from LSym*):
    52  //
    53  //	- byte 0xfe (sanity check for synchronization)
    54  //	- type [int]
    55  //	- name & version [symref index]
    56  //	- flags [int]
    57  //		1 dupok
    58  //	- size [int]
    59  //	- gotype [symref index]
    60  //	- p [data block]
    61  //	- nr [int]
    62  //	- r [nr relocations, sorted by off]
    63  //
    64  // If type == STEXT, there are a few more fields:
    65  //
    66  //	- args [int]
    67  //	- locals [int]
    68  //	- nosplit [int]
    69  //	- flags [int]
    70  //		1<<0 leaf
    71  //		1<<1 C function
    72  //		1<<2 function may call reflect.Type.Method
    73  //	- nlocal [int]
    74  //	- local [nlocal automatics]
    75  //	- pcln [pcln table]
    76  //
    77  // Each relocation has the encoding:
    78  //
    79  //	- off [int]
    80  //	- siz [int]
    81  //	- type [int]
    82  //	- add [int]
    83  //	- sym [symref index]
    84  //
    85  // Each local has the encoding:
    86  //
    87  //	- asym [symref index]
    88  //	- offset [int]
    89  //	- type [int]
    90  //	- gotype [symref index]
    91  //
    92  // The pcln table has the encoding:
    93  //
    94  //	- pcsp [data block]
    95  //	- pcfile [data block]
    96  //	- pcline [data block]
    97  //	- npcdata [int]
    98  //	- pcdata [npcdata data blocks]
    99  //	- nfuncdata [int]
   100  //	- funcdata [nfuncdata symref index]
   101  //	- funcdatasym [nfuncdata ints]
   102  //	- nfile [int]
   103  //	- file [nfile symref index]
   104  //
   105  // The file layout and meaning of type integers are architecture-independent.
   106  //
   107  // TODO(rsc): The file format is good for a first pass but needs work.
   108  //	- There are SymID in the object file that should really just be strings.
   109  
   110  import (
   111  	"bufio"
   112  	"bytes"
   113  	"cmd/internal/bio"
   114  	"cmd/internal/obj"
   115  	"crypto/sha1"
   116  	"encoding/base64"
   117  	"io"
   118  	"log"
   119  	"strconv"
   120  	"strings"
   121  )
   122  
   123  const (
   124  	startmagic = "\x00\x00go17ld"
   125  	endmagic   = "\xff\xffgo17ld"
   126  )
   127  
   128  var emptyPkg = []byte(`"".`)
   129  
   130  // objReader reads Go object files.
   131  type objReader struct {
   132  	rd   *bufio.Reader
   133  	ctxt *Link
   134  	pkg  string
   135  	pn   string
   136  	// List of symbol references for the file being read.
   137  	dupSym *LSym
   138  
   139  	// rdBuf is used by readString and readSymName as scratch for reading strings.
   140  	rdBuf []byte
   141  
   142  	refs        []*LSym
   143  	data        []byte
   144  	reloc       []Reloc
   145  	pcdata      []Pcdata
   146  	autom       []Auto
   147  	funcdata    []*LSym
   148  	funcdataoff []int64
   149  	file        []*LSym
   150  }
   151  
   152  func LoadObjFile(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) {
   153  	start := f.Offset()
   154  	r := &objReader{
   155  		rd:     f.Reader,
   156  		pkg:    pkg,
   157  		ctxt:   ctxt,
   158  		pn:     pn,
   159  		dupSym: &LSym{Name: ".dup"},
   160  	}
   161  	r.loadObjFile()
   162  	if f.Offset() != start+length {
   163  		log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length)
   164  	}
   165  }
   166  
   167  func (r *objReader) loadObjFile() {
   168  	// Increment context version, versions are used to differentiate static files in different packages
   169  	r.ctxt.IncVersion()
   170  
   171  	// Magic header
   172  	var buf [8]uint8
   173  	r.readFull(buf[:])
   174  	if string(buf[:]) != startmagic {
   175  		log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
   176  	}
   177  
   178  	// Version
   179  	c, err := r.rd.ReadByte()
   180  	if err != nil || c != 1 {
   181  		log.Fatalf("%s: invalid file version number %d", r.pn, c)
   182  	}
   183  
   184  	// Autolib
   185  	for {
   186  		lib := r.readString()
   187  		if lib == "" {
   188  			break
   189  		}
   190  		addlib(r.ctxt, r.pkg, r.pn, lib)
   191  	}
   192  
   193  	// Symbol references
   194  	r.refs = []*LSym{nil} // zeroth ref is nil
   195  	for {
   196  		c, err := r.rd.Peek(1)
   197  		if err != nil {
   198  			log.Fatalf("%s: peeking: %v", r.pn, err)
   199  		}
   200  		if c[0] == 0xff {
   201  			r.rd.ReadByte()
   202  			break
   203  		}
   204  		r.readRef()
   205  	}
   206  
   207  	// Lengths
   208  	r.readSlices()
   209  
   210  	// Data section
   211  	r.readFull(r.data)
   212  
   213  	// Defined symbols
   214  	for {
   215  		c, err := r.rd.Peek(1)
   216  		if err != nil {
   217  			log.Fatalf("%s: peeking: %v", r.pn, err)
   218  		}
   219  		if c[0] == 0xff {
   220  			break
   221  		}
   222  		r.readSym()
   223  	}
   224  
   225  	// Magic footer
   226  	buf = [8]uint8{}
   227  	r.readFull(buf[:])
   228  	if string(buf[:]) != endmagic {
   229  		log.Fatalf("%s: invalid file end", r.pn)
   230  	}
   231  }
   232  
   233  func (r *objReader) readSlices() {
   234  	n := r.readInt()
   235  	r.data = make([]byte, n)
   236  	n = r.readInt()
   237  	r.reloc = make([]Reloc, n)
   238  	n = r.readInt()
   239  	r.pcdata = make([]Pcdata, n)
   240  	n = r.readInt()
   241  	r.autom = make([]Auto, n)
   242  	n = r.readInt()
   243  	r.funcdata = make([]*LSym, n)
   244  	r.funcdataoff = make([]int64, n)
   245  	n = r.readInt()
   246  	r.file = make([]*LSym, n)
   247  }
   248  
   249  // Symbols are prefixed so their content doesn't get confused with the magic footer.
   250  const symPrefix = 0xfe
   251  
   252  func (r *objReader) readSym() {
   253  	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
   254  		log.Fatalln("readSym out of sync")
   255  	}
   256  	t := r.readInt()
   257  	s := r.readSymIndex()
   258  	flags := r.readInt()
   259  	dupok := flags&1 != 0
   260  	local := flags&2 != 0
   261  	size := r.readInt()
   262  	typ := r.readSymIndex()
   263  	data := r.readData()
   264  	nreloc := r.readInt()
   265  	isdup := false
   266  
   267  	var dup *LSym
   268  	if s.Type != 0 && s.Type != obj.SXREF {
   269  		if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 {
   270  			if s.Size < int64(size) {
   271  				s.Size = int64(size)
   272  			}
   273  			if typ != nil && s.Gotype == nil {
   274  				s.Gotype = typ
   275  			}
   276  			return
   277  		}
   278  
   279  		if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 {
   280  			goto overwrite
   281  		}
   282  		if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() {
   283  			log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn)
   284  		}
   285  		if len(s.P) > 0 {
   286  			dup = s
   287  			s = r.dupSym
   288  			isdup = true
   289  		}
   290  	}
   291  
   292  overwrite:
   293  	s.File = r.pkg
   294  	if dupok {
   295  		s.Attr |= AttrDuplicateOK
   296  	}
   297  	if t == obj.SXREF {
   298  		log.Fatalf("bad sxref")
   299  	}
   300  	if t == 0 {
   301  		log.Fatalf("missing type for %s in %s", s.Name, r.pn)
   302  	}
   303  	if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) {
   304  		t = int(s.Type)
   305  	}
   306  	s.Type = int16(t)
   307  	if s.Size < int64(size) {
   308  		s.Size = int64(size)
   309  	}
   310  	s.Attr.Set(AttrLocal, local)
   311  	if typ != nil {
   312  		s.Gotype = typ
   313  	}
   314  	if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def
   315  		dup.Gotype = typ
   316  	}
   317  	s.P = data
   318  	if nreloc > 0 {
   319  		s.R = r.reloc[:nreloc:nreloc]
   320  		if !isdup {
   321  			r.reloc = r.reloc[nreloc:]
   322  		}
   323  
   324  		for i := 0; i < nreloc; i++ {
   325  			s.R[i] = Reloc{
   326  				Off:  r.readInt32(),
   327  				Siz:  r.readUint8(),
   328  				Type: r.readInt32(),
   329  				Add:  r.readInt64(),
   330  				Sym:  r.readSymIndex(),
   331  			}
   332  		}
   333  	}
   334  
   335  	if s.Type == obj.STEXT {
   336  		s.FuncInfo = new(FuncInfo)
   337  		pc := s.FuncInfo
   338  
   339  		pc.Args = r.readInt32()
   340  		pc.Locals = r.readInt32()
   341  		if r.readUint8() != 0 {
   342  			s.Attr |= AttrNoSplit
   343  		}
   344  		flags := r.readInt()
   345  		if flags&(1<<2) != 0 {
   346  			s.Attr |= AttrReflectMethod
   347  		}
   348  		n := r.readInt()
   349  		pc.Autom = r.autom[:n:n]
   350  		if !isdup {
   351  			r.autom = r.autom[n:]
   352  		}
   353  
   354  		for i := 0; i < n; i++ {
   355  			pc.Autom[i] = Auto{
   356  				Asym:    r.readSymIndex(),
   357  				Aoffset: r.readInt32(),
   358  				Name:    r.readInt16(),
   359  				Gotype:  r.readSymIndex(),
   360  			}
   361  		}
   362  
   363  		pc.Pcsp.P = r.readData()
   364  		pc.Pcfile.P = r.readData()
   365  		pc.Pcline.P = r.readData()
   366  		n = r.readInt()
   367  		pc.Pcdata = r.pcdata[:n:n]
   368  		if !isdup {
   369  			r.pcdata = r.pcdata[n:]
   370  		}
   371  		for i := 0; i < n; i++ {
   372  			pc.Pcdata[i].P = r.readData()
   373  		}
   374  		n = r.readInt()
   375  		pc.Funcdata = r.funcdata[:n:n]
   376  		pc.Funcdataoff = r.funcdataoff[:n:n]
   377  		if !isdup {
   378  			r.funcdata = r.funcdata[n:]
   379  			r.funcdataoff = r.funcdataoff[n:]
   380  		}
   381  		for i := 0; i < n; i++ {
   382  			pc.Funcdata[i] = r.readSymIndex()
   383  		}
   384  		for i := 0; i < n; i++ {
   385  			pc.Funcdataoff[i] = r.readInt64()
   386  		}
   387  		n = r.readInt()
   388  		pc.File = r.file[:n:n]
   389  		if !isdup {
   390  			r.file = r.file[n:]
   391  		}
   392  		for i := 0; i < n; i++ {
   393  			pc.File[i] = r.readSymIndex()
   394  		}
   395  
   396  		if !isdup {
   397  			if s.Attr.OnList() {
   398  				log.Fatalf("symbol %s listed multiple times", s.Name)
   399  			}
   400  			s.Attr |= AttrOnList
   401  			r.ctxt.Textp = append(r.ctxt.Textp, s)
   402  		}
   403  	}
   404  }
   405  
   406  func (r *objReader) readFull(b []byte) {
   407  	_, err := io.ReadFull(r.rd, b)
   408  	if err != nil {
   409  		log.Fatalf("%s: error reading %s", r.pn, err)
   410  	}
   411  }
   412  
   413  func (r *objReader) readRef() {
   414  	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
   415  		log.Fatalf("readSym out of sync")
   416  	}
   417  	name := r.readSymName()
   418  	v := r.readInt()
   419  	if v != 0 && v != 1 {
   420  		log.Fatalf("invalid symbol version %d", v)
   421  	}
   422  	if v == 1 {
   423  		v = r.ctxt.Version
   424  	}
   425  	s := Linklookup(r.ctxt, name, v)
   426  	r.refs = append(r.refs, s)
   427  
   428  	if s == nil || v != 0 {
   429  		return
   430  	}
   431  	if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 {
   432  		x, err := strconv.ParseUint(s.Name[5:], 16, 64)
   433  		if err != nil {
   434  			log.Panicf("failed to parse $-symbol %s: %v", s.Name, err)
   435  		}
   436  		s.Type = obj.SRODATA
   437  		s.Attr |= AttrLocal
   438  		switch s.Name[:5] {
   439  		case "$f32.":
   440  			if uint64(uint32(x)) != x {
   441  				log.Panicf("$-symbol %s too large: %d", s.Name, x)
   442  			}
   443  			Adduint32(r.ctxt, s, uint32(x))
   444  		case "$f64.", "$i64.":
   445  			Adduint64(r.ctxt, s, x)
   446  		default:
   447  			log.Panicf("unrecognized $-symbol: %s", s.Name)
   448  		}
   449  		s.Attr.Set(AttrReachable, false)
   450  	}
   451  	if strings.HasPrefix(s.Name, "runtime.gcbits.") {
   452  		s.Attr |= AttrLocal
   453  	}
   454  }
   455  
   456  func (r *objReader) readInt64() int64 {
   457  	uv := uint64(0)
   458  	for shift := uint(0); ; shift += 7 {
   459  		if shift >= 64 {
   460  			log.Fatalf("corrupt input")
   461  		}
   462  		c, err := r.rd.ReadByte()
   463  		if err != nil {
   464  			log.Fatalln("error reading input: ", err)
   465  		}
   466  		uv |= uint64(c&0x7F) << shift
   467  		if c&0x80 == 0 {
   468  			break
   469  		}
   470  	}
   471  
   472  	return int64(uv>>1) ^ (int64(uv<<63) >> 63)
   473  }
   474  
   475  func (r *objReader) readInt() int {
   476  	n := r.readInt64()
   477  	if int64(int(n)) != n {
   478  		log.Panicf("%v out of range for int", n)
   479  	}
   480  	return int(n)
   481  }
   482  
   483  func (r *objReader) readInt32() int32 {
   484  	n := r.readInt64()
   485  	if int64(int32(n)) != n {
   486  		log.Panicf("%v out of range for int32", n)
   487  	}
   488  	return int32(n)
   489  }
   490  
   491  func (r *objReader) readInt16() int16 {
   492  	n := r.readInt64()
   493  	if int64(int16(n)) != n {
   494  		log.Panicf("%v out of range for int16", n)
   495  	}
   496  	return int16(n)
   497  }
   498  
   499  func (r *objReader) readUint8() uint8 {
   500  	n := r.readInt64()
   501  	if int64(uint8(n)) != n {
   502  		log.Panicf("%v out of range for uint8", n)
   503  	}
   504  	return uint8(n)
   505  }
   506  
   507  func (r *objReader) readString() string {
   508  	n := r.readInt()
   509  	if cap(r.rdBuf) < n {
   510  		r.rdBuf = make([]byte, 2*n)
   511  	}
   512  	r.readFull(r.rdBuf[:n])
   513  	return string(r.rdBuf[:n])
   514  }
   515  
   516  func (r *objReader) readData() []byte {
   517  	n := r.readInt()
   518  	p := r.data[:n:n]
   519  	r.data = r.data[n:]
   520  	return p
   521  }
   522  
   523  // readSymName reads a symbol name, replacing all "". with pkg.
   524  func (r *objReader) readSymName() string {
   525  	pkg := r.pkg
   526  	n := r.readInt()
   527  	if n == 0 {
   528  		r.readInt64()
   529  		return ""
   530  	}
   531  	if cap(r.rdBuf) < n {
   532  		r.rdBuf = make([]byte, 2*n)
   533  	}
   534  	origName, err := r.rd.Peek(n)
   535  	if err == bufio.ErrBufferFull {
   536  		// Long symbol names are rare but exist. One source is type
   537  		// symbols for types with long string forms. See #15104.
   538  		origName = make([]byte, n)
   539  		r.readFull(origName)
   540  	} else if err != nil {
   541  		log.Fatalf("%s: error reading symbol: %v", r.pn, err)
   542  	}
   543  	adjName := r.rdBuf[:0]
   544  	for {
   545  		i := bytes.Index(origName, emptyPkg)
   546  		if i == -1 {
   547  			s := string(append(adjName, origName...))
   548  			// Read past the peeked origName, now that we're done with it,
   549  			// using the rfBuf (also no longer used) as the scratch space.
   550  			// TODO: use bufio.Reader.Discard if available instead?
   551  			if err == nil {
   552  				r.readFull(r.rdBuf[:n])
   553  			}
   554  			r.rdBuf = adjName[:0] // in case 2*n wasn't enough
   555  
   556  			if DynlinkingGo() {
   557  				// These types are included in the symbol
   558  				// table when dynamically linking. To keep
   559  				// binary size down, we replace the names
   560  				// with SHA-1 prefixes.
   561  				//
   562  				// Keep the type.. prefix, which parts of the
   563  				// linker (like the DWARF generator) know means
   564  				// the symbol is not decodable.
   565  				//
   566  				// Leave type.runtime. symbols alone, because
   567  				// other parts of the linker manipulates them.
   568  				if strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") {
   569  					hash := sha1.Sum([]byte(s))
   570  					prefix := "type."
   571  					if s[5] == '.' {
   572  						prefix = "type.."
   573  					}
   574  					s = prefix + base64.StdEncoding.EncodeToString(hash[:6])
   575  				}
   576  			}
   577  			return s
   578  		}
   579  		adjName = append(adjName, origName[:i]...)
   580  		adjName = append(adjName, pkg...)
   581  		adjName = append(adjName, '.')
   582  		origName = origName[i+len(emptyPkg):]
   583  	}
   584  }
   585  
   586  // Reads the index of a symbol reference and resolves it to a symbol
   587  func (r *objReader) readSymIndex() *LSym {
   588  	i := r.readInt()
   589  	return r.refs[i]
   590  }