github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/cmd/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  package goobj
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"cmd/internal/objabi"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  // A Sym is a named symbol in an object file.
    24  type Sym struct {
    25  	SymID                // symbol identifier (name and version)
    26  	Kind  objabi.SymKind // kind of symbol
    27  	DupOK bool           // are duplicate definitions okay?
    28  	Size  int64          // size of corresponding data
    29  	Type  SymID          // symbol for Go type information
    30  	Data  Data           // memory image of symbol
    31  	Reloc []Reloc        // relocations to apply to Data
    32  	Func  *Func          // additional data for functions
    33  }
    34  
    35  // A SymID - the combination of Name and Version - uniquely identifies
    36  // a symbol within a package.
    37  type SymID struct {
    38  	// Name is the name of a symbol.
    39  	Name string
    40  
    41  	// Version is zero for symbols with global visibility.
    42  	// Symbols with only file visibility (such as file-level static
    43  	// declarations in C) have a non-zero version distinguishing
    44  	// a symbol in one file from a symbol of the same name
    45  	// in another file
    46  	Version int64
    47  }
    48  
    49  func (s SymID) String() string {
    50  	if s.Version == 0 {
    51  		return s.Name
    52  	}
    53  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54  }
    55  
    56  // A Data is a reference to data stored in an object file.
    57  // It records the offset and size of the data, so that a client can
    58  // read the data only if necessary.
    59  type Data struct {
    60  	Offset int64
    61  	Size   int64
    62  }
    63  
    64  // A Reloc describes a relocation applied to a memory image to refer
    65  // to an address within a particular symbol.
    66  type Reloc struct {
    67  	// The bytes at [Offset, Offset+Size) within the containing Sym
    68  	// should be updated to refer to the address Add bytes after the start
    69  	// of the symbol Sym.
    70  	Offset int64
    71  	Size   int64
    72  	Sym    SymID
    73  	Add    int64
    74  
    75  	// The Type records the form of address expected in the bytes
    76  	// described by the previous fields: absolute, PC-relative, and so on.
    77  	// TODO(rsc): The interpretation of Type is not exposed by this package.
    78  	Type objabi.RelocType
    79  }
    80  
    81  // A Var describes a variable in a function stack frame: a declared
    82  // local variable, an input argument, or an output result.
    83  type Var struct {
    84  	// The combination of Name, Kind, and Offset uniquely
    85  	// identifies a variable in a function stack frame.
    86  	// Using fewer of these - in particular, using only Name - does not.
    87  	Name   string // Name of variable.
    88  	Kind   int64  // TODO(rsc): Define meaning.
    89  	Offset int64  // Frame offset. TODO(rsc): Define meaning.
    90  
    91  	Type SymID // Go type for variable.
    92  }
    93  
    94  // Func contains additional per-symbol information specific to functions.
    95  type Func struct {
    96  	Args     int64      // size in bytes of argument frame: inputs and outputs
    97  	Frame    int64      // size in bytes of local variable frame
    98  	Leaf     bool       // function omits save of link register (ARM)
    99  	NoSplit  bool       // function omits stack split prologue
   100  	Var      []Var      // detail about local variables
   101  	PCSP     Data       // PC → SP offset map
   102  	PCFile   Data       // PC → file number map (index into File)
   103  	PCLine   Data       // PC → line number map
   104  	PCInline Data       // PC → inline tree index map
   105  	PCData   []Data     // PC → runtime support data map
   106  	FuncData []FuncData // non-PC-specific runtime support data
   107  	File     []string   // paths indexed by PCFile
   108  	InlTree  []InlinedCall
   109  }
   110  
   111  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   112  
   113  // A FuncData is a single function-specific data value.
   114  type FuncData struct {
   115  	Sym    SymID // symbol holding data
   116  	Offset int64 // offset into symbol for funcdata pointer
   117  }
   118  
   119  // An InlinedCall is a node in an InlTree.
   120  // See cmd/internal/obj.InlTree for details.
   121  type InlinedCall struct {
   122  	Parent int64
   123  	File   string
   124  	Line   int64
   125  	Func   SymID
   126  }
   127  
   128  // A Package is a parsed Go object file or archive defining a Go package.
   129  type Package struct {
   130  	ImportPath string          // import path denoting this package
   131  	Imports    []string        // packages imported by this package
   132  	SymRefs    []SymID         // list of symbol names and versions referred to by this pack
   133  	Syms       []*Sym          // symbols defined by this package
   134  	MaxVersion int64           // maximum Version in any SymID in Syms
   135  	Arch       string          // architecture
   136  	Native     []*NativeReader // native object data (e.g. ELF)
   137  }
   138  
   139  type NativeReader struct {
   140  	Name string
   141  	io.ReaderAt
   142  }
   143  
   144  var (
   145  	archiveHeader = []byte("!<arch>\n")
   146  	archiveMagic  = []byte("`\n")
   147  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   148  
   149  	errCorruptArchive   = errors.New("corrupt archive")
   150  	errTruncatedArchive = errors.New("truncated archive")
   151  	errCorruptObject    = errors.New("corrupt object file")
   152  	errNotObject        = errors.New("unrecognized object file format")
   153  )
   154  
   155  // An objReader is an object file reader.
   156  type objReader struct {
   157  	p          *Package
   158  	b          *bufio.Reader
   159  	f          *os.File
   160  	err        error
   161  	offset     int64
   162  	dataOffset int64
   163  	limit      int64
   164  	tmp        [256]byte
   165  	pkgprefix  string
   166  }
   167  
   168  // init initializes r to read package p from f.
   169  func (r *objReader) init(f *os.File, p *Package) {
   170  	r.f = f
   171  	r.p = p
   172  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   173  	r.limit, _ = f.Seek(0, io.SeekEnd)
   174  	f.Seek(r.offset, io.SeekStart)
   175  	r.b = bufio.NewReader(f)
   176  	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   177  }
   178  
   179  // error records that an error occurred.
   180  // It returns only the first error, so that an error
   181  // caused by an earlier error does not discard information
   182  // about the earlier error.
   183  func (r *objReader) error(err error) error {
   184  	if r.err == nil {
   185  		if err == io.EOF {
   186  			err = io.ErrUnexpectedEOF
   187  		}
   188  		r.err = err
   189  	}
   190  	// panic("corrupt") // useful for debugging
   191  	return r.err
   192  }
   193  
   194  // peek returns the next n bytes without advancing the reader.
   195  func (r *objReader) peek(n int) ([]byte, error) {
   196  	if r.err != nil {
   197  		return nil, r.err
   198  	}
   199  	if r.offset >= r.limit {
   200  		r.error(io.ErrUnexpectedEOF)
   201  		return nil, r.err
   202  	}
   203  	b, err := r.b.Peek(n)
   204  	if err != nil {
   205  		if err != bufio.ErrBufferFull {
   206  			r.error(err)
   207  		}
   208  	}
   209  	return b, err
   210  }
   211  
   212  // readByte reads and returns a byte from the input file.
   213  // On I/O error or EOF, it records the error but returns byte 0.
   214  // A sequence of 0 bytes will eventually terminate any
   215  // parsing state in the object file. In particular, it ends the
   216  // reading of a varint.
   217  func (r *objReader) readByte() byte {
   218  	if r.err != nil {
   219  		return 0
   220  	}
   221  	if r.offset >= r.limit {
   222  		r.error(io.ErrUnexpectedEOF)
   223  		return 0
   224  	}
   225  	b, err := r.b.ReadByte()
   226  	if err != nil {
   227  		if err == io.EOF {
   228  			err = io.ErrUnexpectedEOF
   229  		}
   230  		r.error(err)
   231  		b = 0
   232  	} else {
   233  		r.offset++
   234  	}
   235  	return b
   236  }
   237  
   238  // read reads exactly len(b) bytes from the input file.
   239  // If an error occurs, read returns the error but also
   240  // records it, so it is safe for callers to ignore the result
   241  // as long as delaying the report is not a problem.
   242  func (r *objReader) readFull(b []byte) error {
   243  	if r.err != nil {
   244  		return r.err
   245  	}
   246  	if r.offset+int64(len(b)) > r.limit {
   247  		return r.error(io.ErrUnexpectedEOF)
   248  	}
   249  	n, err := io.ReadFull(r.b, b)
   250  	r.offset += int64(n)
   251  	if err != nil {
   252  		return r.error(err)
   253  	}
   254  	return nil
   255  }
   256  
   257  // readInt reads a zigzag varint from the input file.
   258  func (r *objReader) readInt() int64 {
   259  	var u uint64
   260  
   261  	for shift := uint(0); ; shift += 7 {
   262  		if shift >= 64 {
   263  			r.error(errCorruptObject)
   264  			return 0
   265  		}
   266  		c := r.readByte()
   267  		u |= uint64(c&0x7F) << shift
   268  		if c&0x80 == 0 {
   269  			break
   270  		}
   271  	}
   272  
   273  	return int64(u>>1) ^ (int64(u) << 63 >> 63)
   274  }
   275  
   276  // readString reads a length-delimited string from the input file.
   277  func (r *objReader) readString() string {
   278  	n := r.readInt()
   279  	buf := make([]byte, n)
   280  	r.readFull(buf)
   281  	return string(buf)
   282  }
   283  
   284  // readSymID reads a SymID from the input file.
   285  func (r *objReader) readSymID() SymID {
   286  	i := r.readInt()
   287  	return r.p.SymRefs[i]
   288  }
   289  
   290  func (r *objReader) readRef() {
   291  	name, vers := r.readString(), r.readInt()
   292  
   293  	// In a symbol name in an object file, "". denotes the
   294  	// prefix for the package in which the object file has been found.
   295  	// Expand it.
   296  	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
   297  
   298  	// An individual object file only records version 0 (extern) or 1 (static).
   299  	// To make static symbols unique across all files being read, we
   300  	// replace version 1 with the version corresponding to the current
   301  	// file number. The number is incremented on each call to parseObject.
   302  	if vers != 0 {
   303  		vers = r.p.MaxVersion
   304  	}
   305  	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   306  }
   307  
   308  // readData reads a data reference from the input file.
   309  func (r *objReader) readData() Data {
   310  	n := r.readInt()
   311  	d := Data{Offset: r.dataOffset, Size: n}
   312  	r.dataOffset += n
   313  	return d
   314  }
   315  
   316  // skip skips n bytes in the input.
   317  func (r *objReader) skip(n int64) {
   318  	if n < 0 {
   319  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   320  	}
   321  	if n < int64(len(r.tmp)) {
   322  		// Since the data is so small, a just reading from the buffered
   323  		// reader is better than flushing the buffer and seeking.
   324  		r.readFull(r.tmp[:n])
   325  	} else if n <= int64(r.b.Buffered()) {
   326  		// Even though the data is not small, it has already been read.
   327  		// Advance the buffer instead of seeking.
   328  		for n > int64(len(r.tmp)) {
   329  			r.readFull(r.tmp[:])
   330  			n -= int64(len(r.tmp))
   331  		}
   332  		r.readFull(r.tmp[:n])
   333  	} else {
   334  		// Seek, giving up buffered data.
   335  		_, err := r.f.Seek(r.offset+n, io.SeekStart)
   336  		if err != nil {
   337  			r.error(err)
   338  		}
   339  		r.offset += n
   340  		r.b.Reset(r.f)
   341  	}
   342  }
   343  
   344  // Parse parses an object file or archive from f,
   345  // assuming that its import path is pkgpath.
   346  func Parse(f *os.File, pkgpath string) (*Package, error) {
   347  	if pkgpath == "" {
   348  		pkgpath = `""`
   349  	}
   350  	p := new(Package)
   351  	p.ImportPath = pkgpath
   352  
   353  	var rd objReader
   354  	rd.init(f, p)
   355  	err := rd.readFull(rd.tmp[:8])
   356  	if err != nil {
   357  		if err == io.EOF {
   358  			err = io.ErrUnexpectedEOF
   359  		}
   360  		return nil, err
   361  	}
   362  
   363  	switch {
   364  	default:
   365  		return nil, errNotObject
   366  
   367  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   368  		if err := rd.parseArchive(); err != nil {
   369  			return nil, err
   370  		}
   371  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   372  		if err := rd.parseObject(goobjHeader); err != nil {
   373  			return nil, err
   374  		}
   375  	}
   376  
   377  	return p, nil
   378  }
   379  
   380  // trimSpace removes trailing spaces from b and returns the corresponding string.
   381  // This effectively parses the form used in archive headers.
   382  func trimSpace(b []byte) string {
   383  	return string(bytes.TrimRight(b, " "))
   384  }
   385  
   386  // parseArchive parses a Unix archive of Go object files.
   387  func (r *objReader) parseArchive() error {
   388  	for r.offset < r.limit {
   389  		if err := r.readFull(r.tmp[:60]); err != nil {
   390  			return err
   391  		}
   392  		data := r.tmp[:60]
   393  
   394  		// Each file is preceded by this text header (slice indices in first column):
   395  		//	 0:16	name
   396  		//	16:28 date
   397  		//	28:34 uid
   398  		//	34:40 gid
   399  		//	40:48 mode
   400  		//	48:58 size
   401  		//	58:60 magic - `\n
   402  		// We only care about name, size, and magic.
   403  		// The fields are space-padded on the right.
   404  		// The size is in decimal.
   405  		// The file data - size bytes - follows the header.
   406  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   407  		// byte sits between the file data and the next header.
   408  		// The file data that follows is padded to an even number of bytes:
   409  		// if size is odd, an extra padding byte is inserted betw the next header.
   410  		if len(data) < 60 {
   411  			return errTruncatedArchive
   412  		}
   413  		if !bytes.Equal(data[58:60], archiveMagic) {
   414  			return errCorruptArchive
   415  		}
   416  		name := trimSpace(data[0:16])
   417  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   418  		if err != nil {
   419  			return errCorruptArchive
   420  		}
   421  		data = data[60:]
   422  		fsize := size + size&1
   423  		if fsize < 0 || fsize < size {
   424  			return errCorruptArchive
   425  		}
   426  		switch name {
   427  		case "__.PKGDEF":
   428  			r.skip(size)
   429  		default:
   430  			oldLimit := r.limit
   431  			r.limit = r.offset + size
   432  
   433  			p, err := r.peek(8)
   434  			if err != nil {
   435  				return err
   436  			}
   437  			if bytes.Equal(p, goobjHeader) {
   438  				if err := r.parseObject(nil); err != nil {
   439  					return fmt.Errorf("parsing archive member %q: %v", name, err)
   440  				}
   441  			} else {
   442  				r.p.Native = append(r.p.Native, &NativeReader{
   443  					Name:     name,
   444  					ReaderAt: io.NewSectionReader(r.f, r.offset, size),
   445  				})
   446  			}
   447  
   448  			r.skip(r.limit - r.offset)
   449  			r.limit = oldLimit
   450  		}
   451  		if size&1 != 0 {
   452  			r.skip(1)
   453  		}
   454  	}
   455  	return nil
   456  }
   457  
   458  // parseObject parses a single Go object file.
   459  // The prefix is the bytes already read from the file,
   460  // typically in order to detect that this is an object file.
   461  // The object file consists of a textual header ending in "\n!\n"
   462  // and then the part we want to parse begins.
   463  // The format of that part is defined in a comment at the top
   464  // of src/liblink/objfile.c.
   465  func (r *objReader) parseObject(prefix []byte) error {
   466  	r.p.MaxVersion++
   467  	h := make([]byte, 0, 256)
   468  	h = append(h, prefix...)
   469  	var c1, c2, c3 byte
   470  	for {
   471  		c1, c2, c3 = c2, c3, r.readByte()
   472  		h = append(h, c3)
   473  		// The new export format can contain 0 bytes.
   474  		// Don't consider them errors, only look for r.err != nil.
   475  		if r.err != nil {
   476  			return errCorruptObject
   477  		}
   478  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   479  			break
   480  		}
   481  	}
   482  
   483  	hs := strings.Fields(string(h))
   484  	if len(hs) >= 4 {
   485  		r.p.Arch = hs[3]
   486  	}
   487  	// TODO: extract OS + build ID if/when we need it
   488  
   489  	r.readFull(r.tmp[:8])
   490  	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) {
   491  		return r.error(errCorruptObject)
   492  	}
   493  
   494  	b := r.readByte()
   495  	if b != 1 {
   496  		return r.error(errCorruptObject)
   497  	}
   498  
   499  	// Direct package dependencies.
   500  	for {
   501  		s := r.readString()
   502  		if s == "" {
   503  			break
   504  		}
   505  		r.p.Imports = append(r.p.Imports, s)
   506  	}
   507  
   508  	r.p.SymRefs = []SymID{{"", 0}}
   509  	for {
   510  		if b := r.readByte(); b != 0xfe {
   511  			if b != 0xff {
   512  				return r.error(errCorruptObject)
   513  			}
   514  			break
   515  		}
   516  
   517  		r.readRef()
   518  	}
   519  
   520  	dataLength := r.readInt()
   521  	r.readInt() // n relocations - ignore
   522  	r.readInt() // n pcdata - ignore
   523  	r.readInt() // n autom - ignore
   524  	r.readInt() // n funcdata - ignore
   525  	r.readInt() // n files - ignore
   526  
   527  	r.dataOffset = r.offset
   528  	r.skip(dataLength)
   529  
   530  	// Symbols.
   531  	for {
   532  		if b := r.readByte(); b != 0xfe {
   533  			if b != 0xff {
   534  				return r.error(errCorruptObject)
   535  			}
   536  			break
   537  		}
   538  
   539  		typ := r.readByte()
   540  		s := &Sym{SymID: r.readSymID()}
   541  		r.p.Syms = append(r.p.Syms, s)
   542  		s.Kind = objabi.SymKind(typ)
   543  		flags := r.readInt()
   544  		s.DupOK = flags&1 != 0
   545  		s.Size = r.readInt()
   546  		s.Type = r.readSymID()
   547  		s.Data = r.readData()
   548  		s.Reloc = make([]Reloc, r.readInt())
   549  		for i := range s.Reloc {
   550  			rel := &s.Reloc[i]
   551  			rel.Offset = r.readInt()
   552  			rel.Size = r.readInt()
   553  			rel.Type = objabi.RelocType(r.readInt())
   554  			rel.Add = r.readInt()
   555  			rel.Sym = r.readSymID()
   556  		}
   557  
   558  		if s.Kind == objabi.STEXT {
   559  			f := new(Func)
   560  			s.Func = f
   561  			f.Args = r.readInt()
   562  			f.Frame = r.readInt()
   563  			flags := r.readInt()
   564  			f.Leaf = flags&(1<<0) != 0
   565  			f.NoSplit = r.readInt() != 0
   566  			f.Var = make([]Var, r.readInt())
   567  			for i := range f.Var {
   568  				v := &f.Var[i]
   569  				v.Name = r.readSymID().Name
   570  				v.Offset = r.readInt()
   571  				v.Kind = r.readInt()
   572  				v.Type = r.readSymID()
   573  			}
   574  
   575  			f.PCSP = r.readData()
   576  			f.PCFile = r.readData()
   577  			f.PCLine = r.readData()
   578  			f.PCInline = r.readData()
   579  			f.PCData = make([]Data, r.readInt())
   580  			for i := range f.PCData {
   581  				f.PCData[i] = r.readData()
   582  			}
   583  			f.FuncData = make([]FuncData, r.readInt())
   584  			for i := range f.FuncData {
   585  				f.FuncData[i].Sym = r.readSymID()
   586  			}
   587  			for i := range f.FuncData {
   588  				f.FuncData[i].Offset = int64(r.readInt()) // TODO
   589  			}
   590  			f.File = make([]string, r.readInt())
   591  			for i := range f.File {
   592  				f.File[i] = r.readSymID().Name
   593  			}
   594  			f.InlTree = make([]InlinedCall, r.readInt())
   595  			for i := range f.InlTree {
   596  				f.InlTree[i].Parent = r.readInt()
   597  				f.InlTree[i].File = r.readSymID().Name
   598  				f.InlTree[i].Line = r.readInt()
   599  				f.InlTree[i].Func = r.readSymID()
   600  			}
   601  		}
   602  	}
   603  
   604  	r.readFull(r.tmp[:7])
   605  	if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) {
   606  		return r.error(errCorruptObject)
   607  	}
   608  
   609  	return nil
   610  }
   611  
   612  func (r *Reloc) String(insnOffset uint64) string {
   613  	delta := r.Offset - int64(insnOffset)
   614  	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   615  	if r.Sym.Name != "" {
   616  		if r.Add != 0 {
   617  			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   618  		}
   619  		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   620  	}
   621  	if r.Add != 0 {
   622  		return fmt.Sprintf("%s:%d", s, r.Add)
   623  	}
   624  	return s
   625  }