github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  // TODO(rsc): Write tests. (File format still up in the air a little.)
    10  package goobj
    11  
    12  import (
    13  	"bufio"
    14  	"bytes"
    15  	"cmd/internal/objabi"
    16  	"errors"
    17  	"fmt"
    18  	"io"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  // A Sym is a named symbol in an object file.
    24  type Sym struct {
    25  	SymID                // symbol identifier (name and version)
    26  	Kind  objabi.SymKind // kind of symbol
    27  	DupOK bool           // are duplicate definitions okay?
    28  	Size  int            // size of corresponding data
    29  	Type  SymID          // symbol for Go type information
    30  	Data  Data           // memory image of symbol
    31  	Reloc []Reloc        // relocations to apply to Data
    32  	Func  *Func          // additional data for functions
    33  }
    34  
    35  // A SymID - the combination of Name and Version - uniquely identifies
    36  // a symbol within a package.
    37  type SymID struct {
    38  	// Name is the name of a symbol.
    39  	Name string
    40  
    41  	// Version is zero for symbols with global visibility.
    42  	// Symbols with only file visibility (such as file-level static
    43  	// declarations in C) have a non-zero version distinguishing
    44  	// a symbol in one file from a symbol of the same name
    45  	// in another file
    46  	Version int
    47  }
    48  
    49  func (s SymID) String() string {
    50  	if s.Version == 0 {
    51  		return s.Name
    52  	}
    53  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54  }
    55  
    56  // A Data is a reference to data stored in an object file.
    57  // It records the offset and size of the data, so that a client can
    58  // read the data only if necessary.
    59  type Data struct {
    60  	Offset int64
    61  	Size   int64
    62  }
    63  
    64  // A Reloc describes a relocation applied to a memory image to refer
    65  // to an address within a particular symbol.
    66  type Reloc struct {
    67  	// The bytes at [Offset, Offset+Size) within the containing Sym
    68  	// should be updated to refer to the address Add bytes after the start
    69  	// of the symbol Sym.
    70  	Offset int
    71  	Size   int
    72  	Sym    SymID
    73  	Add    int
    74  
    75  	// The Type records the form of address expected in the bytes
    76  	// described by the previous fields: absolute, PC-relative, and so on.
    77  	// TODO(rsc): The interpretation of Type is not exposed by this package.
    78  	Type objabi.RelocType
    79  }
    80  
    81  // A Var describes a variable in a function stack frame: a declared
    82  // local variable, an input argument, or an output result.
    83  type Var struct {
    84  	// The combination of Name, Kind, and Offset uniquely
    85  	// identifies a variable in a function stack frame.
    86  	// Using fewer of these - in particular, using only Name - does not.
    87  	Name   string // Name of variable.
    88  	Kind   int    // TODO(rsc): Define meaning.
    89  	Offset int    // Frame offset. TODO(rsc): Define meaning.
    90  
    91  	Type SymID // Go type for variable.
    92  }
    93  
    94  // Func contains additional per-symbol information specific to functions.
    95  type Func struct {
    96  	Args     int        // size in bytes of argument frame: inputs and outputs
    97  	Frame    int        // size in bytes of local variable frame
    98  	Leaf     bool       // function omits save of link register (ARM)
    99  	NoSplit  bool       // function omits stack split prologue
   100  	Var      []Var      // detail about local variables
   101  	PCSP     Data       // PC → SP offset map
   102  	PCFile   Data       // PC → file number map (index into File)
   103  	PCLine   Data       // PC → line number map
   104  	PCInline Data       // PC → inline tree index map
   105  	PCData   []Data     // PC → runtime support data map
   106  	FuncData []FuncData // non-PC-specific runtime support data
   107  	File     []string   // paths indexed by PCFile
   108  	InlTree  []InlinedCall
   109  }
   110  
   111  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   112  
   113  // A FuncData is a single function-specific data value.
   114  type FuncData struct {
   115  	Sym    SymID // symbol holding data
   116  	Offset int64 // offset into symbol for funcdata pointer
   117  }
   118  
   119  // An InlinedCall is a node in an InlTree.
   120  // See cmd/internal/obj.InlTree for details.
   121  type InlinedCall struct {
   122  	Parent int
   123  	File   string
   124  	Line   int
   125  	Func   SymID
   126  }
   127  
   128  // A Package is a parsed Go object file or archive defining a Go package.
   129  type Package struct {
   130  	ImportPath string   // import path denoting this package
   131  	Imports    []string // packages imported by this package
   132  	SymRefs    []SymID  // list of symbol names and versions referred to by this pack
   133  	Syms       []*Sym   // symbols defined by this package
   134  	MaxVersion int      // maximum Version in any SymID in Syms
   135  	Arch       string   // architecture
   136  }
   137  
   138  var (
   139  	archiveHeader = []byte("!<arch>\n")
   140  	archiveMagic  = []byte("`\n")
   141  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   142  
   143  	errCorruptArchive   = errors.New("corrupt archive")
   144  	errTruncatedArchive = errors.New("truncated archive")
   145  	errCorruptObject    = errors.New("corrupt object file")
   146  	errNotObject        = errors.New("unrecognized object file format")
   147  )
   148  
   149  // An objReader is an object file reader.
   150  type objReader struct {
   151  	p          *Package
   152  	b          *bufio.Reader
   153  	f          io.ReadSeeker
   154  	err        error
   155  	offset     int64
   156  	dataOffset int64
   157  	limit      int64
   158  	tmp        [256]byte
   159  	pkgprefix  string
   160  }
   161  
   162  // init initializes r to read package p from f.
   163  func (r *objReader) init(f io.ReadSeeker, p *Package) {
   164  	r.f = f
   165  	r.p = p
   166  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   167  	r.limit, _ = f.Seek(0, io.SeekEnd)
   168  	f.Seek(r.offset, io.SeekStart)
   169  	r.b = bufio.NewReader(f)
   170  	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   171  }
   172  
   173  // error records that an error occurred.
   174  // It returns only the first error, so that an error
   175  // caused by an earlier error does not discard information
   176  // about the earlier error.
   177  func (r *objReader) error(err error) error {
   178  	if r.err == nil {
   179  		if err == io.EOF {
   180  			err = io.ErrUnexpectedEOF
   181  		}
   182  		r.err = err
   183  	}
   184  	// panic("corrupt") // useful for debugging
   185  	return r.err
   186  }
   187  
   188  // readByte reads and returns a byte from the input file.
   189  // On I/O error or EOF, it records the error but returns byte 0.
   190  // A sequence of 0 bytes will eventually terminate any
   191  // parsing state in the object file. In particular, it ends the
   192  // reading of a varint.
   193  func (r *objReader) readByte() byte {
   194  	if r.err != nil {
   195  		return 0
   196  	}
   197  	if r.offset >= r.limit {
   198  		r.error(io.ErrUnexpectedEOF)
   199  		return 0
   200  	}
   201  	b, err := r.b.ReadByte()
   202  	if err != nil {
   203  		if err == io.EOF {
   204  			err = io.ErrUnexpectedEOF
   205  		}
   206  		r.error(err)
   207  		b = 0
   208  	} else {
   209  		r.offset++
   210  	}
   211  	return b
   212  }
   213  
   214  // read reads exactly len(b) bytes from the input file.
   215  // If an error occurs, read returns the error but also
   216  // records it, so it is safe for callers to ignore the result
   217  // as long as delaying the report is not a problem.
   218  func (r *objReader) readFull(b []byte) error {
   219  	if r.err != nil {
   220  		return r.err
   221  	}
   222  	if r.offset+int64(len(b)) > r.limit {
   223  		return r.error(io.ErrUnexpectedEOF)
   224  	}
   225  	n, err := io.ReadFull(r.b, b)
   226  	r.offset += int64(n)
   227  	if err != nil {
   228  		return r.error(err)
   229  	}
   230  	return nil
   231  }
   232  
   233  // readInt reads a zigzag varint from the input file.
   234  func (r *objReader) readInt() int {
   235  	var u uint64
   236  
   237  	for shift := uint(0); ; shift += 7 {
   238  		if shift >= 64 {
   239  			r.error(errCorruptObject)
   240  			return 0
   241  		}
   242  		c := r.readByte()
   243  		u |= uint64(c&0x7F) << shift
   244  		if c&0x80 == 0 {
   245  			break
   246  		}
   247  	}
   248  
   249  	v := int64(u>>1) ^ (int64(u) << 63 >> 63)
   250  	if int64(int(v)) != v {
   251  		r.error(errCorruptObject) // TODO
   252  		return 0
   253  	}
   254  	return int(v)
   255  }
   256  
   257  // readString reads a length-delimited string from the input file.
   258  func (r *objReader) readString() string {
   259  	n := r.readInt()
   260  	buf := make([]byte, n)
   261  	r.readFull(buf)
   262  	return string(buf)
   263  }
   264  
   265  // readSymID reads a SymID from the input file.
   266  func (r *objReader) readSymID() SymID {
   267  	i := r.readInt()
   268  	return r.p.SymRefs[i]
   269  }
   270  
   271  func (r *objReader) readRef() {
   272  	name, vers := r.readString(), r.readInt()
   273  
   274  	// In a symbol name in an object file, "". denotes the
   275  	// prefix for the package in which the object file has been found.
   276  	// Expand it.
   277  	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
   278  
   279  	// An individual object file only records version 0 (extern) or 1 (static).
   280  	// To make static symbols unique across all files being read, we
   281  	// replace version 1 with the version corresponding to the current
   282  	// file number. The number is incremented on each call to parseObject.
   283  	if vers != 0 {
   284  		vers = r.p.MaxVersion
   285  	}
   286  	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   287  }
   288  
   289  // readData reads a data reference from the input file.
   290  func (r *objReader) readData() Data {
   291  	n := r.readInt()
   292  	d := Data{Offset: r.dataOffset, Size: int64(n)}
   293  	r.dataOffset += int64(n)
   294  	return d
   295  }
   296  
   297  // skip skips n bytes in the input.
   298  func (r *objReader) skip(n int64) {
   299  	if n < 0 {
   300  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   301  	}
   302  	if n < int64(len(r.tmp)) {
   303  		// Since the data is so small, a just reading from the buffered
   304  		// reader is better than flushing the buffer and seeking.
   305  		r.readFull(r.tmp[:n])
   306  	} else if n <= int64(r.b.Buffered()) {
   307  		// Even though the data is not small, it has already been read.
   308  		// Advance the buffer instead of seeking.
   309  		for n > int64(len(r.tmp)) {
   310  			r.readFull(r.tmp[:])
   311  			n -= int64(len(r.tmp))
   312  		}
   313  		r.readFull(r.tmp[:n])
   314  	} else {
   315  		// Seek, giving up buffered data.
   316  		_, err := r.f.Seek(r.offset+n, io.SeekStart)
   317  		if err != nil {
   318  			r.error(err)
   319  		}
   320  		r.offset += n
   321  		r.b.Reset(r.f)
   322  	}
   323  }
   324  
   325  // Parse parses an object file or archive from r,
   326  // assuming that its import path is pkgpath.
   327  func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) {
   328  	if pkgpath == "" {
   329  		pkgpath = `""`
   330  	}
   331  	p := new(Package)
   332  	p.ImportPath = pkgpath
   333  
   334  	var rd objReader
   335  	rd.init(r, p)
   336  	err := rd.readFull(rd.tmp[:8])
   337  	if err != nil {
   338  		if err == io.EOF {
   339  			err = io.ErrUnexpectedEOF
   340  		}
   341  		return nil, err
   342  	}
   343  
   344  	switch {
   345  	default:
   346  		return nil, errNotObject
   347  
   348  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   349  		if err := rd.parseArchive(); err != nil {
   350  			return nil, err
   351  		}
   352  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   353  		if err := rd.parseObject(goobjHeader); err != nil {
   354  			return nil, err
   355  		}
   356  	}
   357  
   358  	return p, nil
   359  }
   360  
   361  // trimSpace removes trailing spaces from b and returns the corresponding string.
   362  // This effectively parses the form used in archive headers.
   363  func trimSpace(b []byte) string {
   364  	return string(bytes.TrimRight(b, " "))
   365  }
   366  
   367  // parseArchive parses a Unix archive of Go object files.
   368  // TODO(rsc): Need to skip non-Go object files.
   369  // TODO(rsc): Maybe record table of contents in r.p so that
   370  // linker can avoid having code to parse archives too.
   371  func (r *objReader) parseArchive() error {
   372  	for r.offset < r.limit {
   373  		if err := r.readFull(r.tmp[:60]); err != nil {
   374  			return err
   375  		}
   376  		data := r.tmp[:60]
   377  
   378  		// Each file is preceded by this text header (slice indices in first column):
   379  		//	 0:16	name
   380  		//	16:28 date
   381  		//	28:34 uid
   382  		//	34:40 gid
   383  		//	40:48 mode
   384  		//	48:58 size
   385  		//	58:60 magic - `\n
   386  		// We only care about name, size, and magic.
   387  		// The fields are space-padded on the right.
   388  		// The size is in decimal.
   389  		// The file data - size bytes - follows the header.
   390  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   391  		// byte sits between the file data and the next header.
   392  		// The file data that follows is padded to an even number of bytes:
   393  		// if size is odd, an extra padding byte is inserted betw the next header.
   394  		if len(data) < 60 {
   395  			return errTruncatedArchive
   396  		}
   397  		if !bytes.Equal(data[58:60], archiveMagic) {
   398  			return errCorruptArchive
   399  		}
   400  		name := trimSpace(data[0:16])
   401  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   402  		if err != nil {
   403  			return errCorruptArchive
   404  		}
   405  		data = data[60:]
   406  		fsize := size + size&1
   407  		if fsize < 0 || fsize < size {
   408  			return errCorruptArchive
   409  		}
   410  		switch name {
   411  		case "__.PKGDEF":
   412  			r.skip(size)
   413  		default:
   414  			oldLimit := r.limit
   415  			r.limit = r.offset + size
   416  			if err := r.parseObject(nil); err != nil {
   417  				return fmt.Errorf("parsing archive member %q: %v", name, err)
   418  			}
   419  			r.skip(r.limit - r.offset)
   420  			r.limit = oldLimit
   421  		}
   422  		if size&1 != 0 {
   423  			r.skip(1)
   424  		}
   425  	}
   426  	return nil
   427  }
   428  
   429  // parseObject parses a single Go object file.
   430  // The prefix is the bytes already read from the file,
   431  // typically in order to detect that this is an object file.
   432  // The object file consists of a textual header ending in "\n!\n"
   433  // and then the part we want to parse begins.
   434  // The format of that part is defined in a comment at the top
   435  // of src/liblink/objfile.c.
   436  func (r *objReader) parseObject(prefix []byte) error {
   437  	r.p.MaxVersion++
   438  	h := make([]byte, 0, 256)
   439  	h = append(h, prefix...)
   440  	var c1, c2, c3 byte
   441  	for {
   442  		c1, c2, c3 = c2, c3, r.readByte()
   443  		h = append(h, c3)
   444  		// The new export format can contain 0 bytes.
   445  		// Don't consider them errors, only look for r.err != nil.
   446  		if r.err != nil {
   447  			return errCorruptObject
   448  		}
   449  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   450  			break
   451  		}
   452  	}
   453  
   454  	hs := strings.Fields(string(h))
   455  	if len(hs) >= 4 {
   456  		r.p.Arch = hs[3]
   457  	}
   458  	// TODO: extract OS + build ID if/when we need it
   459  
   460  	r.readFull(r.tmp[:8])
   461  	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) {
   462  		return r.error(errCorruptObject)
   463  	}
   464  
   465  	b := r.readByte()
   466  	if b != 1 {
   467  		return r.error(errCorruptObject)
   468  	}
   469  
   470  	// Direct package dependencies.
   471  	for {
   472  		s := r.readString()
   473  		if s == "" {
   474  			break
   475  		}
   476  		r.p.Imports = append(r.p.Imports, s)
   477  	}
   478  
   479  	r.p.SymRefs = []SymID{{"", 0}}
   480  	for {
   481  		if b := r.readByte(); b != 0xfe {
   482  			if b != 0xff {
   483  				return r.error(errCorruptObject)
   484  			}
   485  			break
   486  		}
   487  
   488  		r.readRef()
   489  	}
   490  
   491  	dataLength := r.readInt()
   492  	r.readInt() // n relocations - ignore
   493  	r.readInt() // n pcdata - ignore
   494  	r.readInt() // n autom - ignore
   495  	r.readInt() // n funcdata - ignore
   496  	r.readInt() // n files - ignore
   497  
   498  	r.dataOffset = r.offset
   499  	r.skip(int64(dataLength))
   500  
   501  	// Symbols.
   502  	for {
   503  		if b := r.readByte(); b != 0xfe {
   504  			if b != 0xff {
   505  				return r.error(errCorruptObject)
   506  			}
   507  			break
   508  		}
   509  
   510  		typ := r.readInt()
   511  		s := &Sym{SymID: r.readSymID()}
   512  		r.p.Syms = append(r.p.Syms, s)
   513  		s.Kind = objabi.SymKind(typ)
   514  		flags := r.readInt()
   515  		s.DupOK = flags&1 != 0
   516  		s.Size = r.readInt()
   517  		s.Type = r.readSymID()
   518  		s.Data = r.readData()
   519  		s.Reloc = make([]Reloc, r.readInt())
   520  		for i := range s.Reloc {
   521  			rel := &s.Reloc[i]
   522  			rel.Offset = r.readInt()
   523  			rel.Size = r.readInt()
   524  			rel.Type = objabi.RelocType(r.readInt())
   525  			rel.Add = r.readInt()
   526  			rel.Sym = r.readSymID()
   527  		}
   528  
   529  		if s.Kind == objabi.STEXT {
   530  			f := new(Func)
   531  			s.Func = f
   532  			f.Args = r.readInt()
   533  			f.Frame = r.readInt()
   534  			flags := r.readInt()
   535  			f.Leaf = flags&1 != 0
   536  			f.NoSplit = r.readInt() != 0
   537  			f.Var = make([]Var, r.readInt())
   538  			for i := range f.Var {
   539  				v := &f.Var[i]
   540  				v.Name = r.readSymID().Name
   541  				v.Offset = r.readInt()
   542  				v.Kind = r.readInt()
   543  				v.Type = r.readSymID()
   544  			}
   545  
   546  			f.PCSP = r.readData()
   547  			f.PCFile = r.readData()
   548  			f.PCLine = r.readData()
   549  			f.PCInline = r.readData()
   550  			f.PCData = make([]Data, r.readInt())
   551  			for i := range f.PCData {
   552  				f.PCData[i] = r.readData()
   553  			}
   554  			f.FuncData = make([]FuncData, r.readInt())
   555  			for i := range f.FuncData {
   556  				f.FuncData[i].Sym = r.readSymID()
   557  			}
   558  			for i := range f.FuncData {
   559  				f.FuncData[i].Offset = int64(r.readInt()) // TODO
   560  			}
   561  			f.File = make([]string, r.readInt())
   562  			for i := range f.File {
   563  				f.File[i] = r.readSymID().Name
   564  			}
   565  			f.InlTree = make([]InlinedCall, r.readInt())
   566  			for i := range f.InlTree {
   567  				f.InlTree[i].Parent = r.readInt()
   568  				f.InlTree[i].File = r.readSymID().Name
   569  				f.InlTree[i].Line = r.readInt()
   570  				f.InlTree[i].Func = r.readSymID()
   571  			}
   572  		}
   573  	}
   574  
   575  	r.readFull(r.tmp[:7])
   576  	if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) {
   577  		return r.error(errCorruptObject)
   578  	}
   579  
   580  	return nil
   581  }
   582  
   583  func (r *Reloc) String(insnOffset uint64) string {
   584  	delta := r.Offset - int(insnOffset)
   585  	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   586  	if r.Sym.Name != "" {
   587  		if r.Add != 0 {
   588  			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   589  		}
   590  		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   591  	}
   592  	if r.Add != 0 {
   593  		return fmt.Sprintf("%s:%d", s, r.Add)
   594  	}
   595  	return s
   596  }