github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/internal/goobj/read.go

github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  package goobj
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"github.com/gagliardetto/golang-go/cmd/internal/objabi"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  // A Sym is a named symbol in an object file.
    24  type Sym struct {
    25  	SymID                // symbol identifier (name and version)
    26  	Kind  objabi.SymKind // kind of symbol
    27  	DupOK bool           // are duplicate definitions okay?
    28  	Size  int64          // size of corresponding data
    29  	Type  SymID          // symbol for Go type information
    30  	Data  Data           // memory image of symbol
    31  	Reloc []Reloc        // relocations to apply to Data
    32  	Func  *Func          // additional data for functions
    33  }
    34  
    35  // A SymID - the combination of Name and Version - uniquely identifies
    36  // a symbol within a package.
    37  type SymID struct {
    38  	// Name is the name of a symbol.
    39  	Name string
    40  
    41  	// Version is zero for symbols with global visibility.
    42  	// Symbols with only file visibility (such as file-level static
    43  	// declarations in C) have a non-zero version distinguishing
    44  	// a symbol in one file from a symbol of the same name
    45  	// in another file
    46  	Version int64
    47  }
    48  
    49  func (s SymID) String() string {
    50  	if s.Version == 0 {
    51  		return s.Name
    52  	}
    53  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54  }
    55  
    56  // A Data is a reference to data stored in an object file.
    57  // It records the offset and size of the data, so that a client can
    58  // read the data only if necessary.
    59  type Data struct {
    60  	Offset int64
    61  	Size   int64
    62  }
    63  
    64  // A Reloc describes a relocation applied to a memory image to refer
    65  // to an address within a particular symbol.
    66  type Reloc struct {
    67  	// The bytes at [Offset, Offset+Size) within the containing Sym
    68  	// should be updated to refer to the address Add bytes after the start
    69  	// of the symbol Sym.
    70  	Offset int64
    71  	Size   int64
    72  	Sym    SymID
    73  	Add    int64
    74  
    75  	// The Type records the form of address expected in the bytes
    76  	// described by the previous fields: absolute, PC-relative, and so on.
    77  	// TODO(rsc): The interpretation of Type is not exposed by this package.
    78  	Type objabi.RelocType
    79  }
    80  
    81  // A Var describes a variable in a function stack frame: a declared
    82  // local variable, an input argument, or an output result.
    83  type Var struct {
    84  	// The combination of Name, Kind, and Offset uniquely
    85  	// identifies a variable in a function stack frame.
    86  	// Using fewer of these - in particular, using only Name - does not.
    87  	Name   string // Name of variable.
    88  	Kind   int64  // TODO(rsc): Define meaning.
    89  	Offset int64  // Frame offset. TODO(rsc): Define meaning.
    90  
    91  	Type SymID // Go type for variable.
    92  }
    93  
    94  // Func contains additional per-symbol information specific to functions.
    95  type Func struct {
    96  	Args     int64      // size in bytes of argument frame: inputs and outputs
    97  	Frame    int64      // size in bytes of local variable frame
    98  	Leaf     bool       // function omits save of link register (ARM)
    99  	NoSplit  bool       // function omits stack split prologue
   100  	TopFrame bool       // function is the top of the call stack
   101  	Var      []Var      // detail about local variables
   102  	PCSP     Data       // PC → SP offset map
   103  	PCFile   Data       // PC → file number map (index into File)
   104  	PCLine   Data       // PC → line number map
   105  	PCInline Data       // PC → inline tree index map
   106  	PCData   []Data     // PC → runtime support data map
   107  	FuncData []FuncData // non-PC-specific runtime support data
   108  	File     []string   // paths indexed by PCFile
   109  	InlTree  []InlinedCall
   110  }
   111  
   112  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   113  
   114  // A FuncData is a single function-specific data value.
   115  type FuncData struct {
   116  	Sym    SymID // symbol holding data
   117  	Offset int64 // offset into symbol for funcdata pointer
   118  }
   119  
   120  // An InlinedCall is a node in an InlTree.
   121  // See cmd/internal/obj.InlTree for details.
   122  type InlinedCall struct {
   123  	Parent   int64
   124  	File     string
   125  	Line     int64
   126  	Func     SymID
   127  	ParentPC int64
   128  }
   129  
   130  // A Package is a parsed Go object file or archive defining a Go package.
   131  type Package struct {
   132  	ImportPath    string          // import path denoting this package
   133  	Imports       []string        // packages imported by this package
   134  	SymRefs       []SymID         // list of symbol names and versions referred to by this pack
   135  	Syms          []*Sym          // symbols defined by this package
   136  	MaxVersion    int64           // maximum Version in any SymID in Syms
   137  	Arch          string          // architecture
   138  	Native        []*NativeReader // native object data (e.g. ELF)
   139  	DWARFFileList []string        // List of files for the DWARF .debug_lines section
   140  }
   141  
   142  type NativeReader struct {
   143  	Name string
   144  	io.ReaderAt
   145  }
   146  
   147  var (
   148  	archiveHeader = []byte("!<arch>\n")
   149  	archiveMagic  = []byte("`\n")
   150  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   151  
   152  	errCorruptArchive   = errors.New("corrupt archive")
   153  	errTruncatedArchive = errors.New("truncated archive")
   154  	errCorruptObject    = errors.New("corrupt object file")
   155  	errNotObject        = errors.New("unrecognized object file format")
   156  )
   157  
   158  // An objReader is an object file reader.
   159  type objReader struct {
   160  	p          *Package
   161  	b          *bufio.Reader
   162  	f          *os.File
   163  	err        error
   164  	offset     int64
   165  	dataOffset int64
   166  	limit      int64
   167  	tmp        [256]byte
   168  	pkgprefix  string
   169  }
   170  
   171  // init initializes r to read package p from f.
   172  func (r *objReader) init(f *os.File, p *Package) {
   173  	r.f = f
   174  	r.p = p
   175  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   176  	r.limit, _ = f.Seek(0, io.SeekEnd)
   177  	f.Seek(r.offset, io.SeekStart)
   178  	r.b = bufio.NewReader(f)
   179  	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   180  }
   181  
   182  // error records that an error occurred.
   183  // It returns only the first error, so that an error
   184  // caused by an earlier error does not discard information
   185  // about the earlier error.
   186  func (r *objReader) error(err error) error {
   187  	if r.err == nil {
   188  		if err == io.EOF {
   189  			err = io.ErrUnexpectedEOF
   190  		}
   191  		r.err = err
   192  	}
   193  	// panic("corrupt") // useful for debugging
   194  	return r.err
   195  }
   196  
   197  // peek returns the next n bytes without advancing the reader.
   198  func (r *objReader) peek(n int) ([]byte, error) {
   199  	if r.err != nil {
   200  		return nil, r.err
   201  	}
   202  	if r.offset >= r.limit {
   203  		r.error(io.ErrUnexpectedEOF)
   204  		return nil, r.err
   205  	}
   206  	b, err := r.b.Peek(n)
   207  	if err != nil {
   208  		if err != bufio.ErrBufferFull {
   209  			r.error(err)
   210  		}
   211  	}
   212  	return b, err
   213  }
   214  
   215  // readByte reads and returns a byte from the input file.
   216  // On I/O error or EOF, it records the error but returns byte 0.
   217  // A sequence of 0 bytes will eventually terminate any
   218  // parsing state in the object file. In particular, it ends the
   219  // reading of a varint.
   220  func (r *objReader) readByte() byte {
   221  	if r.err != nil {
   222  		return 0
   223  	}
   224  	if r.offset >= r.limit {
   225  		r.error(io.ErrUnexpectedEOF)
   226  		return 0
   227  	}
   228  	b, err := r.b.ReadByte()
   229  	if err != nil {
   230  		if err == io.EOF {
   231  			err = io.ErrUnexpectedEOF
   232  		}
   233  		r.error(err)
   234  		b = 0
   235  	} else {
   236  		r.offset++
   237  	}
   238  	return b
   239  }
   240  
   241  // read reads exactly len(b) bytes from the input file.
   242  // If an error occurs, read returns the error but also
   243  // records it, so it is safe for callers to ignore the result
   244  // as long as delaying the report is not a problem.
   245  func (r *objReader) readFull(b []byte) error {
   246  	if r.err != nil {
   247  		return r.err
   248  	}
   249  	if r.offset+int64(len(b)) > r.limit {
   250  		return r.error(io.ErrUnexpectedEOF)
   251  	}
   252  	n, err := io.ReadFull(r.b, b)
   253  	r.offset += int64(n)
   254  	if err != nil {
   255  		return r.error(err)
   256  	}
   257  	return nil
   258  }
   259  
   260  // readInt reads a zigzag varint from the input file.
   261  func (r *objReader) readInt() int64 {
   262  	var u uint64
   263  
   264  	for shift := uint(0); ; shift += 7 {
   265  		if shift >= 64 {
   266  			r.error(errCorruptObject)
   267  			return 0
   268  		}
   269  		c := r.readByte()
   270  		u |= uint64(c&0x7F) << shift
   271  		if c&0x80 == 0 {
   272  			break
   273  		}
   274  	}
   275  
   276  	return int64(u>>1) ^ (int64(u) << 63 >> 63)
   277  }
   278  
   279  // readString reads a length-delimited string from the input file.
   280  func (r *objReader) readString() string {
   281  	n := r.readInt()
   282  	buf := make([]byte, n)
   283  	r.readFull(buf)
   284  	return string(buf)
   285  }
   286  
   287  // readSymID reads a SymID from the input file.
   288  func (r *objReader) readSymID() SymID {
   289  	i := r.readInt()
   290  	return r.p.SymRefs[i]
   291  }
   292  
   293  func (r *objReader) readRef() {
   294  	name, abiOrStatic := r.readString(), r.readInt()
   295  
   296  	// In a symbol name in an object file, "". denotes the
   297  	// prefix for the package in which the object file has been found.
   298  	// Expand it.
   299  	name = strings.ReplaceAll(name, `"".`, r.pkgprefix)
   300  
   301  	// The ABI field records either the ABI or -1 for static symbols.
   302  	//
   303  	// To distinguish different static symbols with the same name,
   304  	// we use the symbol "version". Version 0 corresponds to
   305  	// global symbols, and each file has a unique version > 0 for
   306  	// all of its static symbols. The version is incremented on
   307  	// each call to parseObject.
   308  	//
   309  	// For global symbols, we currently ignore the ABI.
   310  	//
   311  	// TODO(austin): Record the ABI in SymID. Since this is a
   312  	// public API, we'll have to keep Version as 0 and record the
   313  	// ABI in a new field (which differs from how the linker does
   314  	// this, but that's okay). Show the ABI in things like
   315  	// objdump.
   316  	var vers int64
   317  	if abiOrStatic == -1 {
   318  		// Static symbol
   319  		vers = r.p.MaxVersion
   320  	}
   321  	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   322  }
   323  
   324  // readData reads a data reference from the input file.
   325  func (r *objReader) readData() Data {
   326  	n := r.readInt()
   327  	d := Data{Offset: r.dataOffset, Size: n}
   328  	r.dataOffset += n
   329  	return d
   330  }
   331  
   332  // skip skips n bytes in the input.
   333  func (r *objReader) skip(n int64) {
   334  	if n < 0 {
   335  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   336  	}
   337  	if n < int64(len(r.tmp)) {
   338  		// Since the data is so small, a just reading from the buffered
   339  		// reader is better than flushing the buffer and seeking.
   340  		r.readFull(r.tmp[:n])
   341  	} else if n <= int64(r.b.Buffered()) {
   342  		// Even though the data is not small, it has already been read.
   343  		// Advance the buffer instead of seeking.
   344  		for n > int64(len(r.tmp)) {
   345  			r.readFull(r.tmp[:])
   346  			n -= int64(len(r.tmp))
   347  		}
   348  		r.readFull(r.tmp[:n])
   349  	} else {
   350  		// Seek, giving up buffered data.
   351  		_, err := r.f.Seek(r.offset+n, io.SeekStart)
   352  		if err != nil {
   353  			r.error(err)
   354  		}
   355  		r.offset += n
   356  		r.b.Reset(r.f)
   357  	}
   358  }
   359  
   360  // Parse parses an object file or archive from f,
   361  // assuming that its import path is pkgpath.
   362  func Parse(f *os.File, pkgpath string) (*Package, error) {
   363  	if pkgpath == "" {
   364  		pkgpath = `""`
   365  	}
   366  	p := new(Package)
   367  	p.ImportPath = pkgpath
   368  
   369  	var rd objReader
   370  	rd.init(f, p)
   371  	err := rd.readFull(rd.tmp[:8])
   372  	if err != nil {
   373  		if err == io.EOF {
   374  			err = io.ErrUnexpectedEOF
   375  		}
   376  		return nil, err
   377  	}
   378  
   379  	switch {
   380  	default:
   381  		return nil, errNotObject
   382  
   383  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   384  		if err := rd.parseArchive(); err != nil {
   385  			return nil, err
   386  		}
   387  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   388  		if err := rd.parseObject(goobjHeader); err != nil {
   389  			return nil, err
   390  		}
   391  	}
   392  
   393  	return p, nil
   394  }
   395  
   396  // trimSpace removes trailing spaces from b and returns the corresponding string.
   397  // This effectively parses the form used in archive headers.
   398  func trimSpace(b []byte) string {
   399  	return string(bytes.TrimRight(b, " "))
   400  }
   401  
   402  // parseArchive parses a Unix archive of Go object files.
   403  func (r *objReader) parseArchive() error {
   404  	for r.offset < r.limit {
   405  		if err := r.readFull(r.tmp[:60]); err != nil {
   406  			return err
   407  		}
   408  		data := r.tmp[:60]
   409  
   410  		// Each file is preceded by this text header (slice indices in first column):
   411  		//	 0:16	name
   412  		//	16:28 date
   413  		//	28:34 uid
   414  		//	34:40 gid
   415  		//	40:48 mode
   416  		//	48:58 size
   417  		//	58:60 magic - `\n
   418  		// We only care about name, size, and magic.
   419  		// The fields are space-padded on the right.
   420  		// The size is in decimal.
   421  		// The file data - size bytes - follows the header.
   422  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   423  		// byte sits between the file data and the next header.
   424  		// The file data that follows is padded to an even number of bytes:
   425  		// if size is odd, an extra padding byte is inserted betw the next header.
   426  		if len(data) < 60 {
   427  			return errTruncatedArchive
   428  		}
   429  		if !bytes.Equal(data[58:60], archiveMagic) {
   430  			return errCorruptArchive
   431  		}
   432  		name := trimSpace(data[0:16])
   433  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   434  		if err != nil {
   435  			return errCorruptArchive
   436  		}
   437  		data = data[60:]
   438  		fsize := size + size&1
   439  		if fsize < 0 || fsize < size {
   440  			return errCorruptArchive
   441  		}
   442  		switch name {
   443  		case "__.PKGDEF":
   444  			r.skip(size)
   445  		default:
   446  			oldLimit := r.limit
   447  			r.limit = r.offset + size
   448  
   449  			p, err := r.peek(8)
   450  			if err != nil {
   451  				return err
   452  			}
   453  			if bytes.Equal(p, goobjHeader) {
   454  				if err := r.parseObject(nil); err != nil {
   455  					return fmt.Errorf("parsing archive member %q: %v", name, err)
   456  				}
   457  			} else {
   458  				r.p.Native = append(r.p.Native, &NativeReader{
   459  					Name:     name,
   460  					ReaderAt: io.NewSectionReader(r.f, r.offset, size),
   461  				})
   462  			}
   463  
   464  			r.skip(r.limit - r.offset)
   465  			r.limit = oldLimit
   466  		}
   467  		if size&1 != 0 {
   468  			r.skip(1)
   469  		}
   470  	}
   471  	return nil
   472  }
   473  
   474  // parseObject parses a single Go object file.
   475  // The prefix is the bytes already read from the file,
   476  // typically in order to detect that this is an object file.
   477  // The object file consists of a textual header ending in "\n!\n"
   478  // and then the part we want to parse begins.
   479  // The format of that part is defined in a comment at the top
   480  // of src/liblink/objfile.c.
   481  func (r *objReader) parseObject(prefix []byte) error {
   482  	r.p.MaxVersion++
   483  	h := make([]byte, 0, 256)
   484  	h = append(h, prefix...)
   485  	var c1, c2, c3 byte
   486  	for {
   487  		c1, c2, c3 = c2, c3, r.readByte()
   488  		h = append(h, c3)
   489  		// The new export format can contain 0 bytes.
   490  		// Don't consider them errors, only look for r.err != nil.
   491  		if r.err != nil {
   492  			return errCorruptObject
   493  		}
   494  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   495  			break
   496  		}
   497  	}
   498  
   499  	hs := strings.Fields(string(h))
   500  	if len(hs) >= 4 {
   501  		r.p.Arch = hs[3]
   502  	}
   503  	// TODO: extract OS + build ID if/when we need it
   504  
   505  	p, err := r.peek(8)
   506  	if err != nil {
   507  		return err
   508  	}
   509  	if bytes.Equal(p, []byte("\x00go114LD")) {
   510  		r.readNew()
   511  		return nil
   512  	}
   513  	r.readFull(r.tmp[:8])
   514  	if !bytes.Equal(r.tmp[:8], []byte("\x00go114ld")) {
   515  		return r.error(errCorruptObject)
   516  	}
   517  
   518  	b := r.readByte()
   519  	if b != 1 {
   520  		return r.error(errCorruptObject)
   521  	}
   522  
   523  	// Direct package dependencies.
   524  	for {
   525  		s := r.readString()
   526  		if s == "" {
   527  			break
   528  		}
   529  		r.p.Imports = append(r.p.Imports, s)
   530  	}
   531  
   532  	// Read filenames for dwarf info.
   533  	count := r.readInt()
   534  	for i := int64(0); i < count; i++ {
   535  		r.p.DWARFFileList = append(r.p.DWARFFileList, r.readString())
   536  	}
   537  
   538  	r.p.SymRefs = []SymID{{"", 0}}
   539  	for {
   540  		if b := r.readByte(); b != 0xfe {
   541  			if b != 0xff {
   542  				return r.error(errCorruptObject)
   543  			}
   544  			break
   545  		}
   546  
   547  		r.readRef()
   548  	}
   549  
   550  	dataLength := r.readInt()
   551  	r.readInt() // n relocations - ignore
   552  	r.readInt() // n pcdata - ignore
   553  	r.readInt() // n autom - ignore
   554  	r.readInt() // n funcdata - ignore
   555  	r.readInt() // n files - ignore
   556  
   557  	r.dataOffset = r.offset
   558  	r.skip(dataLength)
   559  
   560  	// Symbols.
   561  	for {
   562  		if b := r.readByte(); b != 0xfe {
   563  			if b != 0xff {
   564  				return r.error(errCorruptObject)
   565  			}
   566  			break
   567  		}
   568  
   569  		typ := r.readByte()
   570  		s := &Sym{SymID: r.readSymID()}
   571  		r.p.Syms = append(r.p.Syms, s)
   572  		s.Kind = objabi.SymKind(typ)
   573  		flags := r.readInt()
   574  		s.DupOK = flags&1 != 0
   575  		s.Size = r.readInt()
   576  		s.Type = r.readSymID()
   577  		s.Data = r.readData()
   578  		s.Reloc = make([]Reloc, r.readInt())
   579  		for i := range s.Reloc {
   580  			rel := &s.Reloc[i]
   581  			rel.Offset = r.readInt()
   582  			rel.Size = r.readInt()
   583  			rel.Type = objabi.RelocType(r.readInt())
   584  			rel.Add = r.readInt()
   585  			rel.Sym = r.readSymID()
   586  		}
   587  
   588  		if s.Kind == objabi.STEXT {
   589  			f := new(Func)
   590  			s.Func = f
   591  			f.Args = r.readInt()
   592  			f.Frame = r.readInt()
   593  			flags := r.readInt()
   594  			f.Leaf = flags&(1<<0) != 0
   595  			f.TopFrame = flags&(1<<4) != 0
   596  			f.NoSplit = r.readInt() != 0
   597  			f.Var = make([]Var, r.readInt())
   598  			for i := range f.Var {
   599  				v := &f.Var[i]
   600  				v.Name = r.readSymID().Name
   601  				v.Offset = r.readInt()
   602  				v.Kind = r.readInt()
   603  				v.Type = r.readSymID()
   604  			}
   605  
   606  			f.PCSP = r.readData()
   607  			f.PCFile = r.readData()
   608  			f.PCLine = r.readData()
   609  			f.PCInline = r.readData()
   610  			f.PCData = make([]Data, r.readInt())
   611  			for i := range f.PCData {
   612  				f.PCData[i] = r.readData()
   613  			}
   614  			f.FuncData = make([]FuncData, r.readInt())
   615  			for i := range f.FuncData {
   616  				f.FuncData[i].Sym = r.readSymID()
   617  			}
   618  			for i := range f.FuncData {
   619  				f.FuncData[i].Offset = r.readInt() // TODO
   620  			}
   621  			f.File = make([]string, r.readInt())
   622  			for i := range f.File {
   623  				f.File[i] = r.readSymID().Name
   624  			}
   625  			f.InlTree = make([]InlinedCall, r.readInt())
   626  			for i := range f.InlTree {
   627  				f.InlTree[i].Parent = r.readInt()
   628  				f.InlTree[i].File = r.readSymID().Name
   629  				f.InlTree[i].Line = r.readInt()
   630  				f.InlTree[i].Func = r.readSymID()
   631  				f.InlTree[i].ParentPC = r.readInt()
   632  			}
   633  		}
   634  	}
   635  
   636  	r.readFull(r.tmp[:7])
   637  	if !bytes.Equal(r.tmp[:7], []byte("go114ld")) {
   638  		return r.error(errCorruptObject)
   639  	}
   640  
   641  	return nil
   642  }
   643  
   644  func (r *Reloc) String(insnOffset uint64) string {
   645  	delta := r.Offset - int64(insnOffset)
   646  	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   647  	if r.Sym.Name != "" {
   648  		if r.Add != 0 {
   649  			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   650  		}
   651  		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   652  	}
   653  	if r.Add != 0 {
   654  		return fmt.Sprintf("%s:%d", s, r.Add)
   655  	}
   656  	return s
   657  }