github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/cmd/internal/goobj/read.go (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package goobj implements reading of Go object files and archives.
     6  //
     7  // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
     8  // TODO(rsc): Decide the appropriate integer types for various fields.
     9  // TODO(rsc): Write tests. (File format still up in the air a little.)
    10  package goobj
    11  
    12  import (
    13  	"bufio"
    14  	"bytes"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"strconv"
    19  	"strings"
    20  )
    21  
    22  // A SymKind describes the kind of memory represented by a symbol.
    23  type SymKind int
    24  
    25  // This list is taken from include/link.h.
    26  
    27  // Defined SymKind values.
    28  // TODO(rsc): Give idiomatic Go names.
    29  // TODO(rsc): Reduce the number of symbol types in the object files.
    30  const (
    31  	_ SymKind = iota
    32  
    33  	// readonly, executable
    34  	STEXT
    35  	SELFRXSECT
    36  
    37  	// readonly, non-executable
    38  	STYPE
    39  	SSTRING
    40  	SGOSTRING
    41  	SGOFUNC
    42  	SRODATA
    43  	SFUNCTAB
    44  	STYPELINK
    45  	SSYMTAB // TODO: move to unmapped section
    46  	SPCLNTAB
    47  	SELFROSECT
    48  
    49  	// writable, non-executable
    50  	SMACHOPLT
    51  	SELFSECT
    52  	SMACHO // Mach-O __nl_symbol_ptr
    53  	SMACHOGOT
    54  	SNOPTRDATA
    55  	SINITARR
    56  	SDATA
    57  	SWINDOWS
    58  	SBSS
    59  	SNOPTRBSS
    60  	STLSBSS
    61  
    62  	// not mapped
    63  	SXREF
    64  	SMACHOSYMSTR
    65  	SMACHOSYMTAB
    66  	SMACHOINDIRECTPLT
    67  	SMACHOINDIRECTGOT
    68  	SFILE
    69  	SFILEPATH
    70  	SCONST
    71  	SDYNIMPORT
    72  	SHOSTOBJ
    73  )
    74  
    75  var symKindStrings = []string{
    76  	SBSS:              "SBSS",
    77  	SCONST:            "SCONST",
    78  	SDATA:             "SDATA",
    79  	SDYNIMPORT:        "SDYNIMPORT",
    80  	SELFROSECT:        "SELFROSECT",
    81  	SELFRXSECT:        "SELFRXSECT",
    82  	SELFSECT:          "SELFSECT",
    83  	SFILE:             "SFILE",
    84  	SFILEPATH:         "SFILEPATH",
    85  	SFUNCTAB:          "SFUNCTAB",
    86  	SGOFUNC:           "SGOFUNC",
    87  	SGOSTRING:         "SGOSTRING",
    88  	SHOSTOBJ:          "SHOSTOBJ",
    89  	SINITARR:          "SINITARR",
    90  	SMACHO:            "SMACHO",
    91  	SMACHOGOT:         "SMACHOGOT",
    92  	SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT",
    93  	SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT",
    94  	SMACHOPLT:         "SMACHOPLT",
    95  	SMACHOSYMSTR:      "SMACHOSYMSTR",
    96  	SMACHOSYMTAB:      "SMACHOSYMTAB",
    97  	SNOPTRBSS:         "SNOPTRBSS",
    98  	SNOPTRDATA:        "SNOPTRDATA",
    99  	SPCLNTAB:          "SPCLNTAB",
   100  	SRODATA:           "SRODATA",
   101  	SSTRING:           "SSTRING",
   102  	SSYMTAB:           "SSYMTAB",
   103  	STEXT:             "STEXT",
   104  	STLSBSS:           "STLSBSS",
   105  	STYPE:             "STYPE",
   106  	STYPELINK:         "STYPELINK",
   107  	SWINDOWS:          "SWINDOWS",
   108  	SXREF:             "SXREF",
   109  }
   110  
   111  func (k SymKind) String() string {
   112  	if k < 0 || int(k) >= len(symKindStrings) {
   113  		return fmt.Sprintf("SymKind(%d)", k)
   114  	}
   115  	return symKindStrings[k]
   116  }
   117  
   118  // A Sym is a named symbol in an object file.
   119  type Sym struct {
   120  	SymID         // symbol identifier (name and version)
   121  	Kind  SymKind // kind of symbol
   122  	DupOK bool    // are duplicate definitions okay?
   123  	Size  int     // size of corresponding data
   124  	Type  SymID   // symbol for Go type information
   125  	Data  Data    // memory image of symbol
   126  	Reloc []Reloc // relocations to apply to Data
   127  	Func  *Func   // additional data for functions
   128  }
   129  
   130  // A SymID - the combination of Name and Version - uniquely identifies
   131  // a symbol within a package.
   132  type SymID struct {
   133  	// Name is the name of a symbol.
   134  	Name string
   135  
   136  	// Version is zero for symbols with global visibility.
   137  	// Symbols with only file visibility (such as file-level static
   138  	// declarations in C) have a non-zero version distinguishing
   139  	// a symbol in one file from a symbol of the same name
   140  	// in another file
   141  	Version int
   142  }
   143  
   144  func (s SymID) String() string {
   145  	if s.Version == 0 {
   146  		return s.Name
   147  	}
   148  	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
   149  }
   150  
   151  // A Data is a reference to data stored in an object file.
   152  // It records the offset and size of the data, so that a client can
   153  // read the data only if necessary.
   154  type Data struct {
   155  	Offset int64
   156  	Size   int64
   157  }
   158  
   159  // A Reloc describes a relocation applied to a memory image to refer
   160  // to an address within a particular symbol.
   161  type Reloc struct {
   162  	// The bytes at [Offset, Offset+Size) within the memory image
   163  	// should be updated to refer to the address Add bytes after the start
   164  	// of the symbol Sym.
   165  	Offset int
   166  	Size   int
   167  	Sym    SymID
   168  	Add    int
   169  
   170  	// The Type records the form of address expected in the bytes
   171  	// described by the previous fields: absolute, PC-relative, and so on.
   172  	// TODO(rsc): The interpretation of Type is not exposed by this package.
   173  	Type int
   174  }
   175  
   176  // A Var describes a variable in a function stack frame: a declared
   177  // local variable, an input argument, or an output result.
   178  type Var struct {
   179  	// The combination of Name, Kind, and Offset uniquely
   180  	// identifies a variable in a function stack frame.
   181  	// Using fewer of these - in particular, using only Name - does not.
   182  	Name   string // Name of variable.
   183  	Kind   int    // TODO(rsc): Define meaning.
   184  	Offset int    // Frame offset. TODO(rsc): Define meaning.
   185  
   186  	Type SymID // Go type for variable.
   187  }
   188  
   189  // Func contains additional per-symbol information specific to functions.
   190  type Func struct {
   191  	Args     int        // size in bytes of argument frame: inputs and outputs
   192  	Frame    int        // size in bytes of local variable frame
   193  	Leaf     bool       // function omits save of link register (ARM)
   194  	NoSplit  bool       // function omits stack split prologue
   195  	Var      []Var      // detail about local variables
   196  	PCSP     Data       // PC → SP offset map
   197  	PCFile   Data       // PC → file number map (index into File)
   198  	PCLine   Data       // PC → line number map
   199  	PCData   []Data     // PC → runtime support data map
   200  	FuncData []FuncData // non-PC-specific runtime support data
   201  	File     []string   // paths indexed by PCFile
   202  }
   203  
   204  // TODO: Add PCData []byte and PCDataIter (similar to liblink).
   205  
   206  // A FuncData is a single function-specific data value.
   207  type FuncData struct {
   208  	Sym    SymID // symbol holding data
   209  	Offset int64 // offset into symbol for funcdata pointer
   210  }
   211  
   212  // A Package is a parsed Go object file or archive defining a Go package.
   213  type Package struct {
   214  	ImportPath string   // import path denoting this package
   215  	Imports    []string // packages imported by this package
   216  	Syms       []*Sym   // symbols defined by this package
   217  	MaxVersion int      // maximum Version in any SymID in Syms
   218  }
   219  
   220  var (
   221  	archiveHeader = []byte("!<arch>\n")
   222  	archiveMagic  = []byte("`\n")
   223  	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
   224  
   225  	errCorruptArchive   = errors.New("corrupt archive")
   226  	errTruncatedArchive = errors.New("truncated archive")
   227  	errNotArchive       = errors.New("unrecognized archive format")
   228  
   229  	errCorruptObject   = errors.New("corrupt object file")
   230  	errTruncatedObject = errors.New("truncated object file")
   231  	errNotObject       = errors.New("unrecognized object file format")
   232  )
   233  
   234  // An objReader is an object file reader.
   235  type objReader struct {
   236  	p         *Package
   237  	b         *bufio.Reader
   238  	f         io.ReadSeeker
   239  	err       error
   240  	offset    int64
   241  	limit     int64
   242  	tmp       [256]byte
   243  	pkg       string
   244  	pkgprefix string
   245  }
   246  
   247  // importPathToPrefix returns the prefix that will be used in the
   248  // final symbol table for the given import path.
   249  // We escape '%', '"', all control characters and non-ASCII bytes,
   250  // and any '.' after the final slash.
   251  //
   252  // See ../../../cmd/ld/lib.c:/^pathtoprefix and
   253  // ../../../cmd/gc/subr.c:/^pathtoprefix.
   254  func importPathToPrefix(s string) string {
   255  	// find index of last slash, if any, or else -1.
   256  	// used for determining whether an index is after the last slash.
   257  	slash := strings.LastIndex(s, "/")
   258  
   259  	// check for chars that need escaping
   260  	n := 0
   261  	for r := 0; r < len(s); r++ {
   262  		if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
   263  			n++
   264  		}
   265  	}
   266  
   267  	// quick exit
   268  	if n == 0 {
   269  		return s
   270  	}
   271  
   272  	// escape
   273  	const hex = "0123456789abcdef"
   274  	p := make([]byte, 0, len(s)+2*n)
   275  	for r := 0; r < len(s); r++ {
   276  		if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
   277  			p = append(p, '%', hex[c>>4], hex[c&0xF])
   278  		} else {
   279  			p = append(p, c)
   280  		}
   281  	}
   282  
   283  	return string(p)
   284  }
   285  
   286  // init initializes r to read package p from f.
   287  func (r *objReader) init(f io.ReadSeeker, p *Package) {
   288  	r.f = f
   289  	r.p = p
   290  	r.offset, _ = f.Seek(0, 1)
   291  	r.limit, _ = f.Seek(0, 2)
   292  	f.Seek(r.offset, 0)
   293  	r.b = bufio.NewReader(f)
   294  	r.pkgprefix = importPathToPrefix(p.ImportPath) + "."
   295  }
   296  
   297  // error records that an error occurred.
   298  // It returns only the first error, so that an error
   299  // caused by an earlier error does not discard information
   300  // about the earlier error.
   301  func (r *objReader) error(err error) error {
   302  	if r.err == nil {
   303  		if err == io.EOF {
   304  			err = io.ErrUnexpectedEOF
   305  		}
   306  		r.err = err
   307  	}
   308  	// panic("corrupt") // useful for debugging
   309  	return r.err
   310  }
   311  
   312  // readByte reads and returns a byte from the input file.
   313  // On I/O error or EOF, it records the error but returns byte 0.
   314  // A sequence of 0 bytes will eventually terminate any
   315  // parsing state in the object file. In particular, it ends the
   316  // reading of a varint.
   317  func (r *objReader) readByte() byte {
   318  	if r.err != nil {
   319  		return 0
   320  	}
   321  	if r.offset >= r.limit {
   322  		r.error(io.ErrUnexpectedEOF)
   323  		return 0
   324  	}
   325  	b, err := r.b.ReadByte()
   326  	if err != nil {
   327  		if err == io.EOF {
   328  			err = io.ErrUnexpectedEOF
   329  		}
   330  		r.error(err)
   331  		b = 0
   332  	} else {
   333  		r.offset++
   334  	}
   335  	return b
   336  }
   337  
   338  // read reads exactly len(b) bytes from the input file.
   339  // If an error occurs, read returns the error but also
   340  // records it, so it is safe for callers to ignore the result
   341  // as long as delaying the report is not a problem.
   342  func (r *objReader) readFull(b []byte) error {
   343  	if r.err != nil {
   344  		return r.err
   345  	}
   346  	if r.offset+int64(len(b)) > r.limit {
   347  		return r.error(io.ErrUnexpectedEOF)
   348  	}
   349  	n, err := io.ReadFull(r.b, b)
   350  	r.offset += int64(n)
   351  	if err != nil {
   352  		return r.error(err)
   353  	}
   354  	return nil
   355  }
   356  
   357  // readInt reads a zigzag varint from the input file.
   358  func (r *objReader) readInt() int {
   359  	var u uint64
   360  
   361  	for shift := uint(0); ; shift += 7 {
   362  		if shift >= 64 {
   363  			r.error(errCorruptObject)
   364  			return 0
   365  		}
   366  		c := r.readByte()
   367  		u |= uint64(c&0x7F) << shift
   368  		if c&0x80 == 0 {
   369  			break
   370  		}
   371  	}
   372  
   373  	v := int64(u>>1) ^ (int64(u) << 63 >> 63)
   374  	if int64(int(v)) != v {
   375  		r.error(errCorruptObject) // TODO
   376  		return 0
   377  	}
   378  	return int(v)
   379  }
   380  
   381  // readString reads a length-delimited string from the input file.
   382  func (r *objReader) readString() string {
   383  	n := r.readInt()
   384  	buf := make([]byte, n)
   385  	r.readFull(buf)
   386  	return string(buf)
   387  }
   388  
   389  // readSymID reads a SymID from the input file.
   390  func (r *objReader) readSymID() SymID {
   391  	name, vers := r.readString(), r.readInt()
   392  
   393  	// In a symbol name in an object file, "". denotes the
   394  	// prefix for the package in which the object file has been found.
   395  	// Expand it.
   396  	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
   397  
   398  	// An individual object file only records version 0 (extern) or 1 (static).
   399  	// To make static symbols unique across all files being read, we
   400  	// replace version 1 with the version corresponding to the current
   401  	// file number. The number is incremented on each call to parseObject.
   402  	if vers != 0 {
   403  		vers = r.p.MaxVersion
   404  	}
   405  
   406  	return SymID{name, vers}
   407  }
   408  
   409  // readData reads a data reference from the input file.
   410  func (r *objReader) readData() Data {
   411  	n := r.readInt()
   412  	d := Data{Offset: r.offset, Size: int64(n)}
   413  	r.skip(int64(n))
   414  	return d
   415  }
   416  
   417  // skip skips n bytes in the input.
   418  func (r *objReader) skip(n int64) {
   419  	if n < 0 {
   420  		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
   421  	}
   422  	if n < int64(len(r.tmp)) {
   423  		// Since the data is so small, a just reading from the buffered
   424  		// reader is better than flushing the buffer and seeking.
   425  		r.readFull(r.tmp[:n])
   426  	} else if n <= int64(r.b.Buffered()) {
   427  		// Even though the data is not small, it has already been read.
   428  		// Advance the buffer instead of seeking.
   429  		for n > int64(len(r.tmp)) {
   430  			r.readFull(r.tmp[:])
   431  			n -= int64(len(r.tmp))
   432  		}
   433  		r.readFull(r.tmp[:n])
   434  	} else {
   435  		// Seek, giving up buffered data.
   436  		_, err := r.f.Seek(r.offset+n, 0)
   437  		if err != nil {
   438  			r.error(err)
   439  		}
   440  		r.offset += n
   441  		r.b.Reset(r.f)
   442  	}
   443  }
   444  
   445  // Parse parses an object file or archive from r,
   446  // assuming that its import path is pkgpath.
   447  func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) {
   448  	if pkgpath == "" {
   449  		pkgpath = `""`
   450  	}
   451  	p := new(Package)
   452  	p.ImportPath = pkgpath
   453  
   454  	var rd objReader
   455  	rd.init(r, p)
   456  	err := rd.readFull(rd.tmp[:8])
   457  	if err != nil {
   458  		if err == io.EOF {
   459  			err = io.ErrUnexpectedEOF
   460  		}
   461  		return nil, err
   462  	}
   463  
   464  	switch {
   465  	default:
   466  		return nil, errNotObject
   467  
   468  	case bytes.Equal(rd.tmp[:8], archiveHeader):
   469  		if err := rd.parseArchive(); err != nil {
   470  			return nil, err
   471  		}
   472  	case bytes.Equal(rd.tmp[:8], goobjHeader):
   473  		if err := rd.parseObject(goobjHeader); err != nil {
   474  			return nil, err
   475  		}
   476  	}
   477  
   478  	return p, nil
   479  }
   480  
   481  // trimSpace removes trailing spaces from b and returns the corresponding string.
   482  // This effectively parses the form used in archive headers.
   483  func trimSpace(b []byte) string {
   484  	return string(bytes.TrimRight(b, " "))
   485  }
   486  
   487  // parseArchive parses a Unix archive of Go object files.
   488  // TODO(rsc): Need to skip non-Go object files.
   489  // TODO(rsc): Maybe record table of contents in r.p so that
   490  // linker can avoid having code to parse archives too.
   491  func (r *objReader) parseArchive() error {
   492  	for r.offset < r.limit {
   493  		if err := r.readFull(r.tmp[:60]); err != nil {
   494  			return err
   495  		}
   496  		data := r.tmp[:60]
   497  
   498  		// Each file is preceded by this text header (slice indices in first column):
   499  		//	 0:16	name
   500  		//	16:28 date
   501  		//	28:34 uid
   502  		//	34:40 gid
   503  		//	40:48 mode
   504  		//	48:58 size
   505  		//	58:60 magic - `\n
   506  		// We only care about name, size, and magic.
   507  		// The fields are space-padded on the right.
   508  		// The size is in decimal.
   509  		// The file data - size bytes - follows the header.
   510  		// Headers are 2-byte aligned, so if size is odd, an extra padding
   511  		// byte sits between the file data and the next header.
   512  		// The file data that follows is padded to an even number of bytes:
   513  		// if size is odd, an extra padding byte is inserted betw the next header.
   514  		if len(data) < 60 {
   515  			return errTruncatedArchive
   516  		}
   517  		if !bytes.Equal(data[58:60], archiveMagic) {
   518  			return errCorruptArchive
   519  		}
   520  		name := trimSpace(data[0:16])
   521  		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
   522  		if err != nil {
   523  			return errCorruptArchive
   524  		}
   525  		data = data[60:]
   526  		fsize := size + size&1
   527  		if fsize < 0 || fsize < size {
   528  			return errCorruptArchive
   529  		}
   530  		switch name {
   531  		case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF":
   532  			r.skip(size)
   533  		default:
   534  			oldLimit := r.limit
   535  			r.limit = r.offset + size
   536  			if err := r.parseObject(nil); err != nil {
   537  				return fmt.Errorf("parsing archive member %q: %v", name, err)
   538  			}
   539  			r.skip(r.limit - r.offset)
   540  			r.limit = oldLimit
   541  		}
   542  		if size&1 != 0 {
   543  			r.skip(1)
   544  		}
   545  	}
   546  	return nil
   547  }
   548  
   549  // parseObject parses a single Go object file.
   550  // The prefix is the bytes already read from the file,
   551  // typically in order to detect that this is an object file.
   552  // The object file consists of a textual header ending in "\n!\n"
   553  // and then the part we want to parse begins.
   554  // The format of that part is defined in a comment at the top
   555  // of src/liblink/objfile.c.
   556  func (r *objReader) parseObject(prefix []byte) error {
   557  	// TODO(rsc): Maybe use prefix and the initial input to
   558  	// record the header line from the file, which would
   559  	// give the architecture and other version information.
   560  
   561  	r.p.MaxVersion++
   562  	var c1, c2, c3 byte
   563  	for {
   564  		c1, c2, c3 = c2, c3, r.readByte()
   565  		if c3 == 0 { // NUL or EOF, either is bad
   566  			return errCorruptObject
   567  		}
   568  		if c1 == '\n' && c2 == '!' && c3 == '\n' {
   569  			break
   570  		}
   571  	}
   572  
   573  	r.readFull(r.tmp[:8])
   574  	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) {
   575  		return r.error(errCorruptObject)
   576  	}
   577  
   578  	b := r.readByte()
   579  	if b != 1 {
   580  		return r.error(errCorruptObject)
   581  	}
   582  
   583  	// Direct package dependencies.
   584  	for {
   585  		s := r.readString()
   586  		if s == "" {
   587  			break
   588  		}
   589  		r.p.Imports = append(r.p.Imports, s)
   590  	}
   591  
   592  	// Symbols.
   593  	for {
   594  		if b := r.readByte(); b != 0xfe {
   595  			if b != 0xff {
   596  				return r.error(errCorruptObject)
   597  			}
   598  			break
   599  		}
   600  
   601  		typ := r.readInt()
   602  		s := &Sym{SymID: r.readSymID()}
   603  		r.p.Syms = append(r.p.Syms, s)
   604  		s.Kind = SymKind(typ)
   605  		flags := r.readInt()
   606  		s.DupOK = flags&1 != 0
   607  		s.Size = r.readInt()
   608  		s.Type = r.readSymID()
   609  		s.Data = r.readData()
   610  		s.Reloc = make([]Reloc, r.readInt())
   611  		for i := range s.Reloc {
   612  			rel := &s.Reloc[i]
   613  			rel.Offset = r.readInt()
   614  			rel.Size = r.readInt()
   615  			rel.Type = r.readInt()
   616  			rel.Add = r.readInt()
   617  			r.readInt() // Xadd - ignored
   618  			rel.Sym = r.readSymID()
   619  			r.readSymID() // Xsym - ignored
   620  		}
   621  
   622  		if s.Kind == STEXT {
   623  			f := new(Func)
   624  			s.Func = f
   625  			f.Args = r.readInt()
   626  			f.Frame = r.readInt()
   627  			flags := r.readInt()
   628  			f.Leaf = flags&1 != 0
   629  			f.NoSplit = r.readInt() != 0
   630  			f.Var = make([]Var, r.readInt())
   631  			for i := range f.Var {
   632  				v := &f.Var[i]
   633  				v.Name = r.readSymID().Name
   634  				v.Offset = r.readInt()
   635  				v.Kind = r.readInt()
   636  				v.Type = r.readSymID()
   637  			}
   638  
   639  			f.PCSP = r.readData()
   640  			f.PCFile = r.readData()
   641  			f.PCLine = r.readData()
   642  			f.PCData = make([]Data, r.readInt())
   643  			for i := range f.PCData {
   644  				f.PCData[i] = r.readData()
   645  			}
   646  			f.FuncData = make([]FuncData, r.readInt())
   647  			for i := range f.FuncData {
   648  				f.FuncData[i].Sym = r.readSymID()
   649  			}
   650  			for i := range f.FuncData {
   651  				f.FuncData[i].Offset = int64(r.readInt()) // TODO
   652  			}
   653  			f.File = make([]string, r.readInt())
   654  			for i := range f.File {
   655  				f.File[i] = r.readSymID().Name
   656  			}
   657  		}
   658  	}
   659  
   660  	r.readFull(r.tmp[:7])
   661  	if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) {
   662  		return r.error(errCorruptObject)
   663  	}
   664  
   665  	return nil
   666  }