github.com/gernest/nezuko@v0.1.2/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  package goobj
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"errors"
    15  	"fmt"
    16  	"github.com/gernest/nezuko/internal/objabi"
    17  	"io"
    18  	"os"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  // A Sym is a named symbol in an object file.
    24  type Sym struct {
    25  	SymID                // symbol identifier (name and version)
    26  	Kind  objabi.SymKind // kind of symbol
    27  	DupOK bool           // are duplicate definitions okay?
    28  	Size  int64          // size of corresponding data
    29  	Type  SymID          // symbol for Go type information
    30  	Data  Data           // memory image of symbol
    31  	Reloc []Reloc        // relocations to apply to Data
    32  	Func  *Func          // additional data for functions
    33  }
    34  
    35  // A SymID - the combination of Name and Version - uniquely identifies
    36  // a symbol within a package.
    37  type SymID struct {
    38  	// Name is the name of a symbol.
    39  	Name string
    40  
    41  	// Version is zero for symbols with global visibility.
    42  	// Symbols with only file visibility (such as file-level static
    43  	// declarations in C) have a non-zero version distinguishing
    44  	// a symbol in one file from a symbol of the same name
    45  	// in another file
    46  	Version int64
    47  }
    48  
    49  func (s SymID) String() string {
    50  	if s.Version == 0 {
    51  		return s.Name
    52  	}
    53  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    54  }
    55  
    56  // A Data is a reference to data stored in an object file.
    57  // It records the offset and size of the data, so that a client can
    58  // read the data only if necessary.
    59  type Data struct {
    60  	Offset int64
    61  	Size   int64
    62  }
    63  
    64  // A Reloc describes a relocation applied to a memory image to refer
    65  // to an address within a particular symbol.
    66  type Reloc struct {
    67  	// The bytes at [Offset, Offset+Size) within the containing Sym
    68  	// should be updated to refer to the address Add bytes after the start
    69  	// of the symbol Sym.
    70  	Offset int64
    71  	Size   int64
    72  	Sym    SymID
    73  	Add    int64
    74  
    75  	// The Type records the form of address expected in the bytes
    76  	// described by the previous fields: absolute, PC-relative, and so on.
    77  	// TODO(rsc): The interpretation of Type is not exposed by this package.
    78  	Type objabi.RelocType
    79  }
    80  
    81  // A Var describes a variable in a function stack frame: a declared
    82  // local variable, an input argument, or an output result.
    83  type Var struct {
    84  	// The combination of Name, Kind, and Offset uniquely
    85  	// identifies a variable in a function stack frame.
    86  	// Using fewer of these - in particular, using only Name - does not.
    87  	Name   string // Name of variable.
    88  	Kind   int64  // TODO(rsc): Define meaning.
    89  	Offset int64  // Frame offset. TODO(rsc): Define meaning.
    90  
    91  	Type SymID // Go type for variable.
    92  }
    93  
    94  // Func contains additional per-symbol information specific to functions.
    95  type Func struct {
    96  	Args     int64      // size in bytes of argument frame: inputs and outputs
    97  	Frame    int64      // size in bytes of local variable frame
    98  	Leaf     bool       // function omits save of link register (ARM)
    99  	NoSplit  bool       // function omits stack split prologue
   100  	Var      []Var      // detail about local variables
   101  	PCSP     Data       // PC → SP offset map
   102  	PCFile   Data       // PC → file number map (index into File)
   103  	PCLine   Data       // PC → line number map
   104  	PCInline Data       // PC → inline tree index map
   105  	PCData   []Data     // PC → runtime support data map
   106  	FuncData []FuncData // non-PC-specific runtime support data
   107  	File     []string   // paths indexed by PCFile
   108  	InlTree  []InlinedCall
   109  }
   110  
   111  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   112  
   113  // A FuncData is a single function-specific data value.
   114  type FuncData struct {
   115  	Sym    SymID // symbol holding data
   116  	Offset int64 // offset into symbol for funcdata pointer
   117  }
   118  
   119  // An InlinedCall is a node in an InlTree.
   120  // See github.com/gernest/nezuko/internal/obj.InlTree for details.
   121  type InlinedCall struct {
   122  	Parent   int64
   123  	File     string
   124  	Line     int64
   125  	Func     SymID
   126  	ParentPC int64
   127  }
   128  
   129  // A Package is a parsed Go object file or archive defining a Go package.
   130  type Package struct {
   131  	ImportPath string          // import path denoting this package
   132  	Imports    []string        // packages imported by this package
   133  	SymRefs    []SymID         // list of symbol names and versions referred to by this pack
   134  	Syms       []*Sym          // symbols defined by this package
   135  	MaxVersion int64           // maximum Version in any SymID in Syms
   136  	Arch       string          // architecture
   137  	Native     []*NativeReader // native object data (e.g. ELF)
   138  }
   139  
   140  type NativeReader struct {
   141  	Name string
   142  	io.ReaderAt
   143  }
   144  
   145  var (
   146  	archiveHeader = []byte("!<arch>\n")
   147  	archiveMagic  = []byte("`\n")
   148  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   149  
   150  	errCorruptArchive   = errors.New("corrupt archive")
   151  	errTruncatedArchive = errors.New("truncated archive")
   152  	errCorruptObject    = errors.New("corrupt object file")
   153  	errNotObject        = errors.New("unrecognized object file format")
   154  )
   155  
   156  // An objReader is an object file reader.
   157  type objReader struct {
   158  	p          *Package
   159  	b          *bufio.Reader
   160  	f          *os.File
   161  	err        error
   162  	offset     int64
   163  	dataOffset int64
   164  	limit      int64
   165  	tmp        [256]byte
   166  	pkgprefix  string
   167  }
   168  
   169  // init initializes r to read package p from f.
   170  func (r *objReader) init(f *os.File, p *Package) {
   171  	r.f = f
   172  	r.p = p
   173  	r.offset, _ = f.Seek(0, io.SeekCurrent)
   174  	r.limit, _ = f.Seek(0, io.SeekEnd)
   175  	f.Seek(r.offset, io.SeekStart)
   176  	r.b = bufio.NewReader(f)
   177  	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
   178  }
   179  
   180  // error records that an error occurred.
   181  // It returns only the first error, so that an error
   182  // caused by an earlier error does not discard information
   183  // about the earlier error.
   184  func (r *objReader) error(err error) error {
   185  	if r.err == nil {
   186  		if err == io.EOF {
   187  			err = io.ErrUnexpectedEOF
   188  		}
   189  		r.err = err
   190  	}
   191  	// panic("corrupt") // useful for debugging
   192  	return r.err
   193  }
   194  
   195  // peek returns the next n bytes without advancing the reader.
   196  func (r *objReader) peek(n int) ([]byte, error) {
   197  	if r.err != nil {
   198  		return nil, r.err
   199  	}
   200  	if r.offset >= r.limit {
   201  		r.error(io.ErrUnexpectedEOF)
   202  		return nil, r.err
   203  	}
   204  	b, err := r.b.Peek(n)
   205  	if err != nil {
   206  		if err != bufio.ErrBufferFull {
   207  			r.error(err)
   208  		}
   209  	}
   210  	return b, err
   211  }
   212  
   213  // readByte reads and returns a byte from the input file.
   214  // On I/O error or EOF, it records the error but returns byte 0.
   215  // A sequence of 0 bytes will eventually terminate any
   216  // parsing state in the object file. In particular, it ends the
   217  // reading of a varint.
   218  func (r *objReader) readByte() byte {
   219  	if r.err != nil {
   220  		return 0
   221  	}
   222  	if r.offset >= r.limit {
   223  		r.error(io.ErrUnexpectedEOF)
   224  		return 0
   225  	}
   226  	b, err := r.b.ReadByte()
   227  	if err != nil {
   228  		if err == io.EOF {
   229  			err = io.ErrUnexpectedEOF
   230  		}
   231  		r.error(err)
   232  		b = 0
   233  	} else {
   234  		r.offset++
   235  	}
   236  	return b
   237  }
   238  
   239  // read reads exactly len(b) bytes from the input file.
   240  // If an error occurs, read returns the error but also
   241  // records it, so it is safe for callers to ignore the result
   242  // as long as delaying the report is not a problem.
   243  func (r *objReader) readFull(b []byte) error {
   244  	if r.err != nil {
   245  		return r.err
   246  	}
   247  	if r.offset+int64(len(b)) > r.limit {
   248  		return r.error(io.ErrUnexpectedEOF)
   249  	}
   250  	n, err := io.ReadFull(r.b, b)
   251  	r.offset += int64(n)
   252  	if err != nil {
   253  		return r.error(err)
   254  	}
   255  	return nil
   256  }
   257  
   258  // readInt reads a zigzag varint from the input file.
   259  func (r *objReader) readInt() int64 {
   260  	var u uint64
   261  
   262  	for shift := uint(0); ; shift += 7 {
   263  		if shift >= 64 {
   264  			r.error(errCorruptObject)
   265  			return 0
   266  		}
   267  		c := r.readByte()
   268  		u |= uint64(c&0x7F) << shift
   269  		if c&0x80 == 0 {
   270  			break
   271  		}
   272  	}
   273  
   274  	return int64(u>>1) ^ (int64(u) << 63 >> 63)
   275  }
   276  
   277  // readString reads a length-delimited string from the input file.
   278  func (r *objReader) readString() string {
   279  	n := r.readInt()
   280  	buf := make([]byte, n)
   281  	r.readFull(buf)
   282  	return string(buf)
   283  }
   284  
   285  // readSymID reads a SymID from the input file.
   286  func (r *objReader) readSymID() SymID {
   287  	i := r.readInt()
   288  	return r.p.SymRefs[i]
   289  }
   290  
   291  func (r *objReader) readRef() {
   292  	name, abiOrStatic := r.readString(), r.readInt()
   293  
   294  	// In a symbol name in an object file, "". denotes the
   295  	// prefix for the package in which the object file has been found.
   296  	// Expand it.
   297  	name = strings.ReplaceAll(name, `"".`, r.pkgprefix)
   298  
   299  	// The ABI field records either the ABI or -1 for static symbols.
   300  	//
   301  	// To distinguish different static symbols with the same name,
   302  	// we use the symbol "version". Version 0 corresponds to
   303  	// global symbols, and each file has a unique version > 0 for
   304  	// all of its static symbols. The version is incremented on
   305  	// each call to parseObject.
   306  	//
   307  	// For global symbols, we currently ignore the ABI.
   308  	//
   309  	// TODO(austin): Record the ABI in SymID. Since this is a
   310  	// public API, we'll have to keep Version as 0 and record the
   311  	// ABI in a new field (which differs from how the linker does
   312  	// this, but that's okay). Show the ABI in things like
   313  	// objdump.
   314  	var vers int64
   315  	if abiOrStatic == -1 {
   316  		// Static symbol
   317  		vers = r.p.MaxVersion
   318  	}
   319  	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
   320  }
   321  
   322  // readData reads a data reference from the input file.
   323  func (r *objReader) readData() Data {
   324  	n := r.readInt()
   325  	d := Data{Offset: r.dataOffset, Size: n}
   326  	r.dataOffset += n
   327  	return d
   328  }
   329  
   330  // skip skips n bytes in the input.
   331  func (r *objReader) skip(n int64) {
   332  	if n < 0 {
   333  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   334  	}
   335  	if n < int64(len(r.tmp)) {
   336  		// Since the data is so small, a just reading from the buffered
   337  		// reader is better than flushing the buffer and seeking.
   338  		r.readFull(r.tmp[:n])
   339  	} else if n <= int64(r.b.Buffered()) {
   340  		// Even though the data is not small, it has already been read.
   341  		// Advance the buffer instead of seeking.
   342  		for n > int64(len(r.tmp)) {
   343  			r.readFull(r.tmp[:])
   344  			n -= int64(len(r.tmp))
   345  		}
   346  		r.readFull(r.tmp[:n])
   347  	} else {
   348  		// Seek, giving up buffered data.
   349  		_, err := r.f.Seek(r.offset+n, io.SeekStart)
   350  		if err != nil {
   351  			r.error(err)
   352  		}
   353  		r.offset += n
   354  		r.b.Reset(r.f)
   355  	}
   356  }
   357  
   358  // Parse parses an object file or archive from f,
   359  // assuming that its import path is pkgpath.
   360  func Parse(f *os.File, pkgpath string) (*Package, error) {
   361  	if pkgpath == "" {
   362  		pkgpath = `""`
   363  	}
   364  	p := new(Package)
   365  	p.ImportPath = pkgpath
   366  
   367  	var rd objReader
   368  	rd.init(f, p)
   369  	err := rd.readFull(rd.tmp[:8])
   370  	if err != nil {
   371  		if err == io.EOF {
   372  			err = io.ErrUnexpectedEOF
   373  		}
   374  		return nil, err
   375  	}
   376  
   377  	switch {
   378  	default:
   379  		return nil, errNotObject
   380  
   381  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   382  		if err := rd.parseArchive(); err != nil {
   383  			return nil, err
   384  		}
   385  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   386  		if err := rd.parseObject(goobjHeader); err != nil {
   387  			return nil, err
   388  		}
   389  	}
   390  
   391  	return p, nil
   392  }
   393  
   394  // trimSpace removes trailing spaces from b and returns the corresponding string.
   395  // This effectively parses the form used in archive headers.
   396  func trimSpace(b []byte) string {
   397  	return string(bytes.TrimRight(b, " "))
   398  }
   399  
   400  // parseArchive parses a Unix archive of Go object files.
   401  func (r *objReader) parseArchive() error {
   402  	for r.offset < r.limit {
   403  		if err := r.readFull(r.tmp[:60]); err != nil {
   404  			return err
   405  		}
   406  		data := r.tmp[:60]
   407  
   408  		// Each file is preceded by this text header (slice indices in first column):
   409  		//	 0:16	name
   410  		//	16:28 date
   411  		//	28:34 uid
   412  		//	34:40 gid
   413  		//	40:48 mode
   414  		//	48:58 size
   415  		//	58:60 magic - `\n
   416  		// We only care about name, size, and magic.
   417  		// The fields are space-padded on the right.
   418  		// The size is in decimal.
   419  		// The file data - size bytes - follows the header.
   420  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   421  		// byte sits between the file data and the next header.
   422  		// The file data that follows is padded to an even number of bytes:
   423  		// if size is odd, an extra padding byte is inserted betw the next header.
   424  		if len(data) < 60 {
   425  			return errTruncatedArchive
   426  		}
   427  		if !bytes.Equal(data[58:60], archiveMagic) {
   428  			return errCorruptArchive
   429  		}
   430  		name := trimSpace(data[0:16])
   431  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   432  		if err != nil {
   433  			return errCorruptArchive
   434  		}
   435  		data = data[60:]
   436  		fsize := size + size&1
   437  		if fsize < 0 || fsize < size {
   438  			return errCorruptArchive
   439  		}
   440  		switch name {
   441  		case "__.PKGDEF":
   442  			r.skip(size)
   443  		default:
   444  			oldLimit := r.limit
   445  			r.limit = r.offset + size
   446  
   447  			p, err := r.peek(8)
   448  			if err != nil {
   449  				return err
   450  			}
   451  			if bytes.Equal(p, goobjHeader) {
   452  				if err := r.parseObject(nil); err != nil {
   453  					return fmt.Errorf("parsing archive member %q: %v", name, err)
   454  				}
   455  			} else {
   456  				r.p.Native = append(r.p.Native, &NativeReader{
   457  					Name:     name,
   458  					ReaderAt: io.NewSectionReader(r.f, r.offset, size),
   459  				})
   460  			}
   461  
   462  			r.skip(r.limit - r.offset)
   463  			r.limit = oldLimit
   464  		}
   465  		if size&1 != 0 {
   466  			r.skip(1)
   467  		}
   468  	}
   469  	return nil
   470  }
   471  
   472  // parseObject parses a single Go object file.
   473  // The prefix is the bytes already read from the file,
   474  // typically in order to detect that this is an object file.
   475  // The object file consists of a textual header ending in "\n!\n"
   476  // and then the part we want to parse begins.
   477  // The format of that part is defined in a comment at the top
   478  // of src/liblink/objfile.c.
   479  func (r *objReader) parseObject(prefix []byte) error {
   480  	r.p.MaxVersion++
   481  	h := make([]byte, 0, 256)
   482  	h = append(h, prefix...)
   483  	var c1, c2, c3 byte
   484  	for {
   485  		c1, c2, c3 = c2, c3, r.readByte()
   486  		h = append(h, c3)
   487  		// The new export format can contain 0 bytes.
   488  		// Don't consider them errors, only look for r.err != nil.
   489  		if r.err != nil {
   490  			return errCorruptObject
   491  		}
   492  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   493  			break
   494  		}
   495  	}
   496  
   497  	hs := strings.Fields(string(h))
   498  	if len(hs) >= 4 {
   499  		r.p.Arch = hs[3]
   500  	}
   501  	// TODO: extract OS + build ID if/when we need it
   502  
   503  	r.readFull(r.tmp[:8])
   504  	if !bytes.Equal(r.tmp[:8], []byte("\x00go112ld")) {
   505  		return r.error(errCorruptObject)
   506  	}
   507  
   508  	b := r.readByte()
   509  	if b != 1 {
   510  		return r.error(errCorruptObject)
   511  	}
   512  
   513  	// Direct package dependencies.
   514  	for {
   515  		s := r.readString()
   516  		if s == "" {
   517  			break
   518  		}
   519  		r.p.Imports = append(r.p.Imports, s)
   520  	}
   521  
   522  	r.p.SymRefs = []SymID{{"", 0}}
   523  	for {
   524  		if b := r.readByte(); b != 0xfe {
   525  			if b != 0xff {
   526  				return r.error(errCorruptObject)
   527  			}
   528  			break
   529  		}
   530  
   531  		r.readRef()
   532  	}
   533  
   534  	dataLength := r.readInt()
   535  	r.readInt() // n relocations - ignore
   536  	r.readInt() // n pcdata - ignore
   537  	r.readInt() // n autom - ignore
   538  	r.readInt() // n funcdata - ignore
   539  	r.readInt() // n files - ignore
   540  
   541  	r.dataOffset = r.offset
   542  	r.skip(dataLength)
   543  
   544  	// Symbols.
   545  	for {
   546  		if b := r.readByte(); b != 0xfe {
   547  			if b != 0xff {
   548  				return r.error(errCorruptObject)
   549  			}
   550  			break
   551  		}
   552  
   553  		typ := r.readByte()
   554  		s := &Sym{SymID: r.readSymID()}
   555  		r.p.Syms = append(r.p.Syms, s)
   556  		s.Kind = objabi.SymKind(typ)
   557  		flags := r.readInt()
   558  		s.DupOK = flags&1 != 0
   559  		s.Size = r.readInt()
   560  		s.Type = r.readSymID()
   561  		s.Data = r.readData()
   562  		s.Reloc = make([]Reloc, r.readInt())
   563  		for i := range s.Reloc {
   564  			rel := &s.Reloc[i]
   565  			rel.Offset = r.readInt()
   566  			rel.Size = r.readInt()
   567  			rel.Type = objabi.RelocType(r.readInt())
   568  			rel.Add = r.readInt()
   569  			rel.Sym = r.readSymID()
   570  		}
   571  
   572  		if s.Kind == objabi.STEXT {
   573  			f := new(Func)
   574  			s.Func = f
   575  			f.Args = r.readInt()
   576  			f.Frame = r.readInt()
   577  			flags := r.readInt()
   578  			f.Leaf = flags&(1<<0) != 0
   579  			f.NoSplit = r.readInt() != 0
   580  			f.Var = make([]Var, r.readInt())
   581  			for i := range f.Var {
   582  				v := &f.Var[i]
   583  				v.Name = r.readSymID().Name
   584  				v.Offset = r.readInt()
   585  				v.Kind = r.readInt()
   586  				v.Type = r.readSymID()
   587  			}
   588  
   589  			f.PCSP = r.readData()
   590  			f.PCFile = r.readData()
   591  			f.PCLine = r.readData()
   592  			f.PCInline = r.readData()
   593  			f.PCData = make([]Data, r.readInt())
   594  			for i := range f.PCData {
   595  				f.PCData[i] = r.readData()
   596  			}
   597  			f.FuncData = make([]FuncData, r.readInt())
   598  			for i := range f.FuncData {
   599  				f.FuncData[i].Sym = r.readSymID()
   600  			}
   601  			for i := range f.FuncData {
   602  				f.FuncData[i].Offset = r.readInt() // TODO
   603  			}
   604  			f.File = make([]string, r.readInt())
   605  			for i := range f.File {
   606  				f.File[i] = r.readSymID().Name
   607  			}
   608  			f.InlTree = make([]InlinedCall, r.readInt())
   609  			for i := range f.InlTree {
   610  				f.InlTree[i].Parent = r.readInt()
   611  				f.InlTree[i].File = r.readSymID().Name
   612  				f.InlTree[i].Line = r.readInt()
   613  				f.InlTree[i].Func = r.readSymID()
   614  				f.InlTree[i].ParentPC = r.readInt()
   615  			}
   616  		}
   617  	}
   618  
   619  	r.readFull(r.tmp[:7])
   620  	if !bytes.Equal(r.tmp[:7], []byte("go112ld")) {
   621  		return r.error(errCorruptObject)
   622  	}
   623  
   624  	return nil
   625  }
   626  
   627  func (r *Reloc) String(insnOffset uint64) string {
   628  	delta := r.Offset - int64(insnOffset)
   629  	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
   630  	if r.Sym.Name != "" {
   631  		if r.Add != 0 {
   632  			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
   633  		}
   634  		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
   635  	}
   636  	if r.Add != 0 {
   637  		return fmt.Sprintf("%s:%d", s, r.Add)
   638  	}
   639  	return s
   640  }