github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/cmd/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  package goobj
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"cmd/internal/objabi"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  // A Sym is a named symbol in an object file.
    24  type Sym struct {
    25  	SymID                // symbol identifier (name and version)
    26  	Kind  objabi.SymKind // kind of symbol
    27  	DupOK bool           // are duplicate definitions okay?
    28  	Size  int            // size of corresponding data
    29  	Type  SymID          // symbol for Go type information
    30  	Data  Data           // memory image of symbol
    31  	Reloc []Reloc        // relocations to apply to Data
    32  	Func  *Func          // additional data for functions
    33  }
    34  
    35  // A SymID - the combination of Name and Version - uniquely identifies
    36  // a symbol within a package.
    37  type SymID struct {
    38  	// Name is the name of a symbol.
    39  	Name string
    40  
    41  	// Version is zero for symbols with global visibility.
    42  	// Symbols with only file visibility (such as file-level static
    43  	// declarations in C) have a non-zero version distinguishing
    44  	// a symbol in one file from a symbol of the same name
    45  	// in another file
    46  	Version int
    47  }
    48  
    49  func (s SymID) String() string {
    50  	if s.Version == 0 {
    51  		return s.Name
    52  	}
    53  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54  }
    55  
    56  // A Data is a reference to data stored in an object file.
    57  // It records the offset and size of the data, so that a client can
    58  // read the data only if necessary.
    59  type Data struct {
    60  	Offset int64
    61  	Size   int64
    62  }
    63  
    64  // A Reloc describes a relocation applied to a memory image to refer
    65  // to an address within a particular symbol.
    66  type Reloc struct {
    67  	// The bytes at [Offset, Offset+Size) within the containing Sym
    68  	// should be updated to refer to the address Add bytes after the start
    69  	// of the symbol Sym.
    70  	Offset int
    71  	Size   int
    72  	Sym    SymID
    73  	Add    int
    74  
    75  	// The Type records the form of address expected in the bytes
    76  	// described by the previous fields: absolute, PC-relative, and so on.
    77  	// TODO(rsc): The interpretation of Type is not exposed by this package.
    78  	Type objabi.RelocType
    79  }
    80  
    81  // A Var describes a variable in a function stack frame: a declared
    82  // local variable, an input argument, or an output result.
    83  type Var struct {
    84  	// The combination of Name, Kind, and Offset uniquely
    85  	// identifies a variable in a function stack frame.
    86  	// Using fewer of these - in particular, using only Name - does not.
    87  	Name   string // Name of variable.
    88  	Kind   int    // TODO(rsc): Define meaning.
    89  	Offset int    // Frame offset. TODO(rsc): Define meaning.
    90  
    91  	Type SymID // Go type for variable.
    92  }
    93  
    94  // Func contains additional per-symbol information specific to functions.
    95  type Func struct {
    96  	Args     int        // size in bytes of argument frame: inputs and outputs
    97  	Frame    int        // size in bytes of local variable frame
    98  	Leaf     bool       // function omits save of link register (ARM)
    99  	NoSplit  bool       // function omits stack split prologue
   100  	Var      []Var      // detail about local variables
   101  	PCSP     Data       // PC → SP offset map
   102  	PCFile   Data       // PC → file number map (index into File)
   103  	PCLine   Data       // PC → line number map
   104  	PCInline Data       // PC → inline tree index map
   105  	PCData   []Data     // PC → runtime support data map
   106  	FuncData []FuncData // non-PC-specific runtime support data
   107  	File     []string   // paths indexed by PCFile
   108  	InlTree  []InlinedCall
   109  }
   110  
   111  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   112  
   113  // A FuncData is a single function-specific data value.
   114  type FuncData struct {
   115  	Sym    SymID // symbol holding data
   116  	Offset int64 // offset into symbol for funcdata pointer
   117  }
   118  
   119  // An InlinedCall is a node in an InlTree.
   120  // See cmd/internal/obj.InlTree for details.
   121  type InlinedCall struct {
   122  	Parent int
   123  	File   string
   124  	Line   int
   125  	Func   SymID
   126  }
   127  
   128  // A Package is a parsed Go object file or archive defining a Go package.
   129  type Package struct {
   130  	ImportPath string          // import path denoting this package
   131  	Imports    []string        // packages imported by this package
   132  	SymRefs    []SymID         // list of symbol names and versions referred to by this pack
   133  	Syms       []*Sym          // symbols defined by this package
   134  	MaxVersion int             // maximum Version in any SymID in Syms
   135  	Arch       string          // architecture
   136  	Native     []*NativeReader // native object data (e.g. ELF)
   137  }
   138  
   139  type NativeReader struct {
   140  	Name string
   141  	io.ReaderAt
   142  }
   143  
   144  var (
   145  	archiveHeader = []byte("!<arch>\n")
   146  	archiveMagic  = []byte("`\n")
   147  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   148  
   149  	errCorruptArchive   = errors.New("corrupt archive")
   150  	errTruncatedArchive = errors.New("truncated archive")
   151  	errCorruptObject    = errors.New("corrupt object file")
   152  	errNotObject        = errors.New("unrecognized object file format")
   153  )
   154  
   155  // An objReader is an object file reader.
   156  type objReader struct {
   157  	p          *Package
   158  	b          *bufio.Reader
   159  	f          *os.File
   160  	err        error
   161  	offset     int64
   162  	dataOffset int64
   163  	limit      int64
   164  	tmp        [256]byte
   165  	pkgprefix  string
   166  }
   167  
   168  // init initializes r to read package p from f.
   169  func (r *objReader) init(f *os.File, p *Package) {
   170  	r.f = f
   171  	r.p = p
   172  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   173  	r.limit, _ = f.Seek(0, io.SeekEnd)
   174  	f.Seek(r.offset, io.SeekStart)
   175  	r.b = bufio.NewReader(f)
   176  	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   177  }
   178  
   179  // error records that an error occurred.
   180  // It returns only the first error, so that an error
   181  // caused by an earlier error does not discard information
   182  // about the earlier error.
   183  func (r *objReader) error(err error) error {
   184  	if r.err == nil {
   185  		if err == io.EOF {
   186  			err = io.ErrUnexpectedEOF
   187  		}
   188  		r.err = err
   189  	}
   190  	// panic("corrupt") // useful for debugging
   191  	return r.err
   192  }
   193  
   194  // peek returns the next n bytes without advancing the reader.
   195  func (r *objReader) peek(n int) ([]byte, error) {
   196  	if r.err != nil {
   197  		return nil, r.err
   198  	}
   199  	if r.offset >= r.limit {
   200  		r.error(io.ErrUnexpectedEOF)
   201  		return nil, r.err
   202  	}
   203  	b, err := r.b.Peek(n)
   204  	if err != nil {
   205  		if err != bufio.ErrBufferFull {
   206  			r.error(err)
   207  		}
   208  	}
   209  	return b, err
   210  }
   211  
   212  // readByte reads and returns a byte from the input file.
   213  // On I/O error or EOF, it records the error but returns byte 0.
   214  // A sequence of 0 bytes will eventually terminate any
   215  // parsing state in the object file. In particular, it ends the
   216  // reading of a varint.
   217  func (r *objReader) readByte() byte {
   218  	if r.err != nil {
   219  		return 0
   220  	}
   221  	if r.offset >= r.limit {
   222  		r.error(io.ErrUnexpectedEOF)
   223  		return 0
   224  	}
   225  	b, err := r.b.ReadByte()
   226  	if err != nil {
   227  		if err == io.EOF {
   228  			err = io.ErrUnexpectedEOF
   229  		}
   230  		r.error(err)
   231  		b = 0
   232  	} else {
   233  		r.offset++
   234  	}
   235  	return b
   236  }
   237  
   238  // read reads exactly len(b) bytes from the input file.
   239  // If an error occurs, read returns the error but also
   240  // records it, so it is safe for callers to ignore the result
   241  // as long as delaying the report is not a problem.
   242  func (r *objReader) readFull(b []byte) error {
   243  	if r.err != nil {
   244  		return r.err
   245  	}
   246  	if r.offset+int64(len(b)) > r.limit {
   247  		return r.error(io.ErrUnexpectedEOF)
   248  	}
   249  	n, err := io.ReadFull(r.b, b)
   250  	r.offset += int64(n)
   251  	if err != nil {
   252  		return r.error(err)
   253  	}
   254  	return nil
   255  }
   256  
   257  // readInt reads a zigzag varint from the input file.
   258  func (r *objReader) readInt() int {
   259  	var u uint64
   260  
   261  	for shift := uint(0); ; shift += 7 {
   262  		if shift >= 64 {
   263  			r.error(errCorruptObject)
   264  			return 0
   265  		}
   266  		c := r.readByte()
   267  		u |= uint64(c&0x7F) << shift
   268  		if c&0x80 == 0 {
   269  			break
   270  		}
   271  	}
   272  
   273  	v := int64(u>>1) ^ (int64(u) << 63 >> 63)
   274  	if int64(int(v)) != v {
   275  		r.error(errCorruptObject) // TODO
   276  		return 0
   277  	}
   278  	return int(v)
   279  }
   280  
   281  // readString reads a length-delimited string from the input file.
   282  func (r *objReader) readString() string {
   283  	n := r.readInt()
   284  	buf := make([]byte, n)
   285  	r.readFull(buf)
   286  	return string(buf)
   287  }
   288  
   289  // readSymID reads a SymID from the input file.
   290  func (r *objReader) readSymID() SymID {
   291  	i := r.readInt()
   292  	return r.p.SymRefs[i]
   293  }
   294  
   295  func (r *objReader) readRef() {
   296  	name, vers := r.readString(), r.readInt()
   297  
   298  	// In a symbol name in an object file, "". denotes the
   299  	// prefix for the package in which the object file has been found.
   300  	// Expand it.
   301  	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
   302  
   303  	// An individual object file only records version 0 (extern) or 1 (static).
   304  	// To make static symbols unique across all files being read, we
   305  	// replace version 1 with the version corresponding to the current
   306  	// file number. The number is incremented on each call to parseObject.
   307  	if vers != 0 {
   308  		vers = r.p.MaxVersion
   309  	}
   310  	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   311  }
   312  
   313  // readData reads a data reference from the input file.
   314  func (r *objReader) readData() Data {
   315  	n := r.readInt()
   316  	d := Data{Offset: r.dataOffset, Size: int64(n)}
   317  	r.dataOffset += int64(n)
   318  	return d
   319  }
   320  
   321  // skip skips n bytes in the input.
   322  func (r *objReader) skip(n int64) {
   323  	if n < 0 {
   324  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   325  	}
   326  	if n < int64(len(r.tmp)) {
   327  		// Since the data is so small, a just reading from the buffered
   328  		// reader is better than flushing the buffer and seeking.
   329  		r.readFull(r.tmp[:n])
   330  	} else if n <= int64(r.b.Buffered()) {
   331  		// Even though the data is not small, it has already been read.
   332  		// Advance the buffer instead of seeking.
   333  		for n > int64(len(r.tmp)) {
   334  			r.readFull(r.tmp[:])
   335  			n -= int64(len(r.tmp))
   336  		}
   337  		r.readFull(r.tmp[:n])
   338  	} else {
   339  		// Seek, giving up buffered data.
   340  		_, err := r.f.Seek(r.offset+n, io.SeekStart)
   341  		if err != nil {
   342  			r.error(err)
   343  		}
   344  		r.offset += n
   345  		r.b.Reset(r.f)
   346  	}
   347  }
   348  
   349  // Parse parses an object file or archive from f,
   350  // assuming that its import path is pkgpath.
   351  func Parse(f *os.File, pkgpath string) (*Package, error) {
   352  	if pkgpath == "" {
   353  		pkgpath = `""`
   354  	}
   355  	p := new(Package)
   356  	p.ImportPath = pkgpath
   357  
   358  	var rd objReader
   359  	rd.init(f, p)
   360  	err := rd.readFull(rd.tmp[:8])
   361  	if err != nil {
   362  		if err == io.EOF {
   363  			err = io.ErrUnexpectedEOF
   364  		}
   365  		return nil, err
   366  	}
   367  
   368  	switch {
   369  	default:
   370  		return nil, errNotObject
   371  
   372  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   373  		if err := rd.parseArchive(); err != nil {
   374  			return nil, err
   375  		}
   376  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   377  		if err := rd.parseObject(goobjHeader); err != nil {
   378  			return nil, err
   379  		}
   380  	}
   381  
   382  	return p, nil
   383  }
   384  
   385  // trimSpace removes trailing spaces from b and returns the corresponding string.
   386  // This effectively parses the form used in archive headers.
   387  func trimSpace(b []byte) string {
   388  	return string(bytes.TrimRight(b, " "))
   389  }
   390  
   391  // parseArchive parses a Unix archive of Go object files.
   392  func (r *objReader) parseArchive() error {
   393  	for r.offset < r.limit {
   394  		if err := r.readFull(r.tmp[:60]); err != nil {
   395  			return err
   396  		}
   397  		data := r.tmp[:60]
   398  
   399  		// Each file is preceded by this text header (slice indices in first column):
   400  		//	 0:16	name
   401  		//	16:28 date
   402  		//	28:34 uid
   403  		//	34:40 gid
   404  		//	40:48 mode
   405  		//	48:58 size
   406  		//	58:60 magic - `\n
   407  		// We only care about name, size, and magic.
   408  		// The fields are space-padded on the right.
   409  		// The size is in decimal.
   410  		// The file data - size bytes - follows the header.
   411  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   412  		// byte sits between the file data and the next header.
   413  		// The file data that follows is padded to an even number of bytes:
   414  		// if size is odd, an extra padding byte is inserted betw the next header.
   415  		if len(data) < 60 {
   416  			return errTruncatedArchive
   417  		}
   418  		if !bytes.Equal(data[58:60], archiveMagic) {
   419  			return errCorruptArchive
   420  		}
   421  		name := trimSpace(data[0:16])
   422  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   423  		if err != nil {
   424  			return errCorruptArchive
   425  		}
   426  		data = data[60:]
   427  		fsize := size + size&1
   428  		if fsize < 0 || fsize < size {
   429  			return errCorruptArchive
   430  		}
   431  		switch name {
   432  		case "__.PKGDEF":
   433  			r.skip(size)
   434  		default:
   435  			oldLimit := r.limit
   436  			r.limit = r.offset + size
   437  
   438  			p, err := r.peek(8)
   439  			if err != nil {
   440  				return err
   441  			}
   442  			if bytes.Equal(p, goobjHeader) {
   443  				if err := r.parseObject(nil); err != nil {
   444  					return fmt.Errorf("parsing archive member %q: %v", name, err)
   445  				}
   446  			} else {
   447  				r.p.Native = append(r.p.Native, &NativeReader{
   448  					Name:     name,
   449  					ReaderAt: io.NewSectionReader(r.f, r.offset, size),
   450  				})
   451  			}
   452  
   453  			r.skip(r.limit - r.offset)
   454  			r.limit = oldLimit
   455  		}
   456  		if size&1 != 0 {
   457  			r.skip(1)
   458  		}
   459  	}
   460  	return nil
   461  }
   462  
   463  // parseObject parses a single Go object file.
   464  // The prefix is the bytes already read from the file,
   465  // typically in order to detect that this is an object file.
   466  // The object file consists of a textual header ending in "\n!\n"
   467  // and then the part we want to parse begins.
   468  // The format of that part is defined in a comment at the top
   469  // of src/liblink/objfile.c.
   470  func (r *objReader) parseObject(prefix []byte) error {
   471  	r.p.MaxVersion++
   472  	h := make([]byte, 0, 256)
   473  	h = append(h, prefix...)
   474  	var c1, c2, c3 byte
   475  	for {
   476  		c1, c2, c3 = c2, c3, r.readByte()
   477  		h = append(h, c3)
   478  		// The new export format can contain 0 bytes.
   479  		// Don't consider them errors, only look for r.err != nil.
   480  		if r.err != nil {
   481  			return errCorruptObject
   482  		}
   483  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   484  			break
   485  		}
   486  	}
   487  
   488  	hs := strings.Fields(string(h))
   489  	if len(hs) >= 4 {
   490  		r.p.Arch = hs[3]
   491  	}
   492  	// TODO: extract OS + build ID if/when we need it
   493  
   494  	r.readFull(r.tmp[:8])
   495  	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) {
   496  		return r.error(errCorruptObject)
   497  	}
   498  
   499  	b := r.readByte()
   500  	if b != 1 {
   501  		return r.error(errCorruptObject)
   502  	}
   503  
   504  	// Direct package dependencies.
   505  	for {
   506  		s := r.readString()
   507  		if s == "" {
   508  			break
   509  		}
   510  		r.p.Imports = append(r.p.Imports, s)
   511  	}
   512  
   513  	r.p.SymRefs = []SymID{{"", 0}}
   514  	for {
   515  		if b := r.readByte(); b != 0xfe {
   516  			if b != 0xff {
   517  				return r.error(errCorruptObject)
   518  			}
   519  			break
   520  		}
   521  
   522  		r.readRef()
   523  	}
   524  
   525  	dataLength := r.readInt()
   526  	r.readInt() // n relocations - ignore
   527  	r.readInt() // n pcdata - ignore
   528  	r.readInt() // n autom - ignore
   529  	r.readInt() // n funcdata - ignore
   530  	r.readInt() // n files - ignore
   531  
   532  	r.dataOffset = r.offset
   533  	r.skip(int64(dataLength))
   534  
   535  	// Symbols.
   536  	for {
   537  		if b := r.readByte(); b != 0xfe {
   538  			if b != 0xff {
   539  				return r.error(errCorruptObject)
   540  			}
   541  			break
   542  		}
   543  
   544  		typ := r.readByte()
   545  		s := &Sym{SymID: r.readSymID()}
   546  		r.p.Syms = append(r.p.Syms, s)
   547  		s.Kind = objabi.SymKind(typ)
   548  		flags := r.readInt()
   549  		s.DupOK = flags&1 != 0
   550  		s.Size = r.readInt()
   551  		s.Type = r.readSymID()
   552  		s.Data = r.readData()
   553  		s.Reloc = make([]Reloc, r.readInt())
   554  		for i := range s.Reloc {
   555  			rel := &s.Reloc[i]
   556  			rel.Offset = r.readInt()
   557  			rel.Size = r.readInt()
   558  			rel.Type = objabi.RelocType(r.readInt())
   559  			rel.Add = r.readInt()
   560  			rel.Sym = r.readSymID()
   561  		}
   562  
   563  		if s.Kind == objabi.STEXT {
   564  			f := new(Func)
   565  			s.Func = f
   566  			f.Args = r.readInt()
   567  			f.Frame = r.readInt()
   568  			flags := r.readInt()
   569  			f.Leaf = flags&(1<<0) != 0
   570  			f.NoSplit = r.readInt() != 0
   571  			f.Var = make([]Var, r.readInt())
   572  			for i := range f.Var {
   573  				v := &f.Var[i]
   574  				v.Name = r.readSymID().Name
   575  				v.Offset = r.readInt()
   576  				v.Kind = r.readInt()
   577  				v.Type = r.readSymID()
   578  			}
   579  
   580  			f.PCSP = r.readData()
   581  			f.PCFile = r.readData()
   582  			f.PCLine = r.readData()
   583  			f.PCInline = r.readData()
   584  			f.PCData = make([]Data, r.readInt())
   585  			for i := range f.PCData {
   586  				f.PCData[i] = r.readData()
   587  			}
   588  			f.FuncData = make([]FuncData, r.readInt())
   589  			for i := range f.FuncData {
   590  				f.FuncData[i].Sym = r.readSymID()
   591  			}
   592  			for i := range f.FuncData {
   593  				f.FuncData[i].Offset = int64(r.readInt()) // TODO
   594  			}
   595  			f.File = make([]string, r.readInt())
   596  			for i := range f.File {
   597  				f.File[i] = r.readSymID().Name
   598  			}
   599  			f.InlTree = make([]InlinedCall, r.readInt())
   600  			for i := range f.InlTree {
   601  				f.InlTree[i].Parent = r.readInt()
   602  				f.InlTree[i].File = r.readSymID().Name
   603  				f.InlTree[i].Line = r.readInt()
   604  				f.InlTree[i].Func = r.readSymID()
   605  			}
   606  		}
   607  	}
   608  
   609  	r.readFull(r.tmp[:7])
   610  	if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) {
   611  		return r.error(errCorruptObject)
   612  	}
   613  
   614  	return nil
   615  }
   616  
   617  func (r *Reloc) String(insnOffset uint64) string {
   618  	delta := r.Offset - int(insnOffset)
   619  	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   620  	if r.Sym.Name != "" {
   621  		if r.Add != 0 {
   622  			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   623  		}
   624  		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   625  	}
   626  	if r.Add != 0 {
   627  		return fmt.Sprintf("%s:%d", s, r.Add)
   628  	}
   629  	return s
   630  }