github.com/tcnksm/go@v0.0.0-20141208075154-439b32936367/src/debug/gosym/symtab.go (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package gosym implements access to the Go symbol
     6  // and line number tables embedded in Go binaries generated
     7  // by the gc compilers.
     8  package gosym
     9  
    10  // The table format is a variant of the format used in Plan 9's a.out
    11  // format, documented at http://plan9.bell-labs.com/magic/man2html/6/a.out.
    12  // The best reference for the differences between the Plan 9 format
    13  // and the Go format is the runtime source, specifically ../../runtime/symtab.c.
    14  
    15  import (
    16  	"bytes"
    17  	"encoding/binary"
    18  	"fmt"
    19  	"strconv"
    20  	"strings"
    21  )
    22  
    23  /*
    24   * Symbols
    25   */
    26  
    27  // A Sym represents a single symbol table entry.
    28  type Sym struct {
    29  	Value  uint64
    30  	Type   byte
    31  	Name   string
    32  	GoType uint64
    33  	// If this symbol if a function symbol, the corresponding Func
    34  	Func *Func
    35  }
    36  
    37  // Static reports whether this symbol is static (not visible outside its file).
    38  func (s *Sym) Static() bool { return s.Type >= 'a' }
    39  
    40  // PackageName returns the package part of the symbol name,
    41  // or the empty string if there is none.
    42  func (s *Sym) PackageName() string {
    43  	if i := strings.Index(s.Name, "."); i != -1 {
    44  		return s.Name[0:i]
    45  	}
    46  	return ""
    47  }
    48  
    49  // ReceiverName returns the receiver type name of this symbol,
    50  // or the empty string if there is none.
    51  func (s *Sym) ReceiverName() string {
    52  	l := strings.Index(s.Name, ".")
    53  	r := strings.LastIndex(s.Name, ".")
    54  	if l == -1 || r == -1 || l == r {
    55  		return ""
    56  	}
    57  	return s.Name[l+1 : r]
    58  }
    59  
    60  // BaseName returns the symbol name without the package or receiver name.
    61  func (s *Sym) BaseName() string {
    62  	if i := strings.LastIndex(s.Name, "."); i != -1 {
    63  		return s.Name[i+1:]
    64  	}
    65  	return s.Name
    66  }
    67  
    68  // A Func collects information about a single function.
    69  type Func struct {
    70  	Entry uint64
    71  	*Sym
    72  	End       uint64
    73  	Params    []*Sym
    74  	Locals    []*Sym
    75  	FrameSize int
    76  	LineTable *LineTable
    77  	Obj       *Obj
    78  }
    79  
    80  // An Obj represents a collection of functions in a symbol table.
    81  //
    82  // The exact method of division of a binary into separate Objs is an internal detail
    83  // of the symbol table format.
    84  //
    85  // In early versions of Go each source file became a different Obj.
    86  //
    87  // In Go 1 and Go 1.1, each package produced one Obj for all Go sources
    88  // and one Obj per C source file.
    89  //
    90  // In Go 1.2, there is a single Obj for the entire program.
    91  type Obj struct {
    92  	// Funcs is a list of functions in the Obj.
    93  	Funcs []Func
    94  
    95  	// In Go 1.1 and earlier, Paths is a list of symbols corresponding
    96  	// to the source file names that produced the Obj.
    97  	// In Go 1.2, Paths is nil.
    98  	// Use the keys of Table.Files to obtain a list of source files.
    99  	Paths []Sym // meta
   100  }
   101  
   102  /*
   103   * Symbol tables
   104   */
   105  
   106  // Table represents a Go symbol table.  It stores all of the
   107  // symbols decoded from the program and provides methods to translate
   108  // between symbols, names, and addresses.
   109  type Table struct {
   110  	Syms  []Sym
   111  	Funcs []Func
   112  	Files map[string]*Obj // nil for Go 1.2 and later binaries
   113  	Objs  []Obj           // nil for Go 1.2 and later binaries
   114  
   115  	go12line *LineTable // Go 1.2 line number table
   116  }
   117  
   118  type sym struct {
   119  	value  uint64
   120  	gotype uint64
   121  	typ    byte
   122  	name   []byte
   123  }
   124  
   125  var (
   126  	littleEndianSymtab    = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
   127  	bigEndianSymtab       = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
   128  	oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
   129  )
   130  
   131  func walksymtab(data []byte, fn func(sym) error) error {
   132  	if len(data) == 0 { // missing symtab is okay
   133  		return nil
   134  	}
   135  	var order binary.ByteOrder = binary.BigEndian
   136  	newTable := false
   137  	switch {
   138  	case bytes.HasPrefix(data, oldLittleEndianSymtab):
   139  		// Same as Go 1.0, but little endian.
   140  		// Format was used during interim development between Go 1.0 and Go 1.1.
   141  		// Should not be widespread, but easy to support.
   142  		data = data[6:]
   143  		order = binary.LittleEndian
   144  	case bytes.HasPrefix(data, bigEndianSymtab):
   145  		newTable = true
   146  	case bytes.HasPrefix(data, littleEndianSymtab):
   147  		newTable = true
   148  		order = binary.LittleEndian
   149  	}
   150  	var ptrsz int
   151  	if newTable {
   152  		if len(data) < 8 {
   153  			return &DecodingError{len(data), "unexpected EOF", nil}
   154  		}
   155  		ptrsz = int(data[7])
   156  		if ptrsz != 4 && ptrsz != 8 {
   157  			return &DecodingError{7, "invalid pointer size", ptrsz}
   158  		}
   159  		data = data[8:]
   160  	}
   161  	var s sym
   162  	p := data
   163  	for len(p) >= 4 {
   164  		var typ byte
   165  		if newTable {
   166  			// Symbol type, value, Go type.
   167  			typ = p[0] & 0x3F
   168  			wideValue := p[0]&0x40 != 0
   169  			goType := p[0]&0x80 != 0
   170  			if typ < 26 {
   171  				typ += 'A'
   172  			} else {
   173  				typ += 'a' - 26
   174  			}
   175  			s.typ = typ
   176  			p = p[1:]
   177  			if wideValue {
   178  				if len(p) < ptrsz {
   179  					return &DecodingError{len(data), "unexpected EOF", nil}
   180  				}
   181  				// fixed-width value
   182  				if ptrsz == 8 {
   183  					s.value = order.Uint64(p[0:8])
   184  					p = p[8:]
   185  				} else {
   186  					s.value = uint64(order.Uint32(p[0:4]))
   187  					p = p[4:]
   188  				}
   189  			} else {
   190  				// varint value
   191  				s.value = 0
   192  				shift := uint(0)
   193  				for len(p) > 0 && p[0]&0x80 != 0 {
   194  					s.value |= uint64(p[0]&0x7F) << shift
   195  					shift += 7
   196  					p = p[1:]
   197  				}
   198  				if len(p) == 0 {
   199  					return &DecodingError{len(data), "unexpected EOF", nil}
   200  				}
   201  				s.value |= uint64(p[0]) << shift
   202  				p = p[1:]
   203  			}
   204  			if goType {
   205  				if len(p) < ptrsz {
   206  					return &DecodingError{len(data), "unexpected EOF", nil}
   207  				}
   208  				// fixed-width go type
   209  				if ptrsz == 8 {
   210  					s.gotype = order.Uint64(p[0:8])
   211  					p = p[8:]
   212  				} else {
   213  					s.gotype = uint64(order.Uint32(p[0:4]))
   214  					p = p[4:]
   215  				}
   216  			}
   217  		} else {
   218  			// Value, symbol type.
   219  			s.value = uint64(order.Uint32(p[0:4]))
   220  			if len(p) < 5 {
   221  				return &DecodingError{len(data), "unexpected EOF", nil}
   222  			}
   223  			typ = p[4]
   224  			if typ&0x80 == 0 {
   225  				return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
   226  			}
   227  			typ &^= 0x80
   228  			s.typ = typ
   229  			p = p[5:]
   230  		}
   231  
   232  		// Name.
   233  		var i int
   234  		var nnul int
   235  		for i = 0; i < len(p); i++ {
   236  			if p[i] == 0 {
   237  				nnul = 1
   238  				break
   239  			}
   240  		}
   241  		switch typ {
   242  		case 'z', 'Z':
   243  			p = p[i+nnul:]
   244  			for i = 0; i+2 <= len(p); i += 2 {
   245  				if p[i] == 0 && p[i+1] == 0 {
   246  					nnul = 2
   247  					break
   248  				}
   249  			}
   250  		}
   251  		if len(p) < i+nnul {
   252  			return &DecodingError{len(data), "unexpected EOF", nil}
   253  		}
   254  		s.name = p[0:i]
   255  		i += nnul
   256  		p = p[i:]
   257  
   258  		if !newTable {
   259  			if len(p) < 4 {
   260  				return &DecodingError{len(data), "unexpected EOF", nil}
   261  			}
   262  			// Go type.
   263  			s.gotype = uint64(order.Uint32(p[:4]))
   264  			p = p[4:]
   265  		}
   266  		fn(s)
   267  	}
   268  	return nil
   269  }
   270  
   271  // NewTable decodes the Go symbol table in data,
   272  // returning an in-memory representation.
   273  func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
   274  	var n int
   275  	err := walksymtab(symtab, func(s sym) error {
   276  		n++
   277  		return nil
   278  	})
   279  	if err != nil {
   280  		return nil, err
   281  	}
   282  
   283  	var t Table
   284  	if pcln.isGo12() {
   285  		t.go12line = pcln
   286  	}
   287  	fname := make(map[uint16]string)
   288  	t.Syms = make([]Sym, 0, n)
   289  	nf := 0
   290  	nz := 0
   291  	lasttyp := uint8(0)
   292  	err = walksymtab(symtab, func(s sym) error {
   293  		n := len(t.Syms)
   294  		t.Syms = t.Syms[0 : n+1]
   295  		ts := &t.Syms[n]
   296  		ts.Type = s.typ
   297  		ts.Value = uint64(s.value)
   298  		ts.GoType = uint64(s.gotype)
   299  		switch s.typ {
   300  		default:
   301  			// rewrite name to use . instead of ยท (c2 b7)
   302  			w := 0
   303  			b := s.name
   304  			for i := 0; i < len(b); i++ {
   305  				if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
   306  					i++
   307  					b[i] = '.'
   308  				}
   309  				b[w] = b[i]
   310  				w++
   311  			}
   312  			ts.Name = string(s.name[0:w])
   313  		case 'z', 'Z':
   314  			if lasttyp != 'z' && lasttyp != 'Z' {
   315  				nz++
   316  			}
   317  			for i := 0; i < len(s.name); i += 2 {
   318  				eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
   319  				elt, ok := fname[eltIdx]
   320  				if !ok {
   321  					return &DecodingError{-1, "bad filename code", eltIdx}
   322  				}
   323  				if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
   324  					ts.Name += "/"
   325  				}
   326  				ts.Name += elt
   327  			}
   328  		}
   329  		switch s.typ {
   330  		case 'T', 't', 'L', 'l':
   331  			nf++
   332  		case 'f':
   333  			fname[uint16(s.value)] = ts.Name
   334  		}
   335  		lasttyp = s.typ
   336  		return nil
   337  	})
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	t.Funcs = make([]Func, 0, nf)
   343  	t.Files = make(map[string]*Obj)
   344  
   345  	var obj *Obj
   346  	if t.go12line != nil {
   347  		// Put all functions into one Obj.
   348  		t.Objs = make([]Obj, 1)
   349  		obj = &t.Objs[0]
   350  		t.go12line.go12MapFiles(t.Files, obj)
   351  	} else {
   352  		t.Objs = make([]Obj, 0, nz)
   353  	}
   354  
   355  	// Count text symbols and attach frame sizes, parameters, and
   356  	// locals to them.  Also, find object file boundaries.
   357  	lastf := 0
   358  	for i := 0; i < len(t.Syms); i++ {
   359  		sym := &t.Syms[i]
   360  		switch sym.Type {
   361  		case 'Z', 'z': // path symbol
   362  			if t.go12line != nil {
   363  				// Go 1.2 binaries have the file information elsewhere. Ignore.
   364  				break
   365  			}
   366  			// Finish the current object
   367  			if obj != nil {
   368  				obj.Funcs = t.Funcs[lastf:]
   369  			}
   370  			lastf = len(t.Funcs)
   371  
   372  			// Start new object
   373  			n := len(t.Objs)
   374  			t.Objs = t.Objs[0 : n+1]
   375  			obj = &t.Objs[n]
   376  
   377  			// Count & copy path symbols
   378  			var end int
   379  			for end = i + 1; end < len(t.Syms); end++ {
   380  				if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
   381  					break
   382  				}
   383  			}
   384  			obj.Paths = t.Syms[i:end]
   385  			i = end - 1 // loop will i++
   386  
   387  			// Record file names
   388  			depth := 0
   389  			for j := range obj.Paths {
   390  				s := &obj.Paths[j]
   391  				if s.Name == "" {
   392  					depth--
   393  				} else {
   394  					if depth == 0 {
   395  						t.Files[s.Name] = obj
   396  					}
   397  					depth++
   398  				}
   399  			}
   400  
   401  		case 'T', 't', 'L', 'l': // text symbol
   402  			if n := len(t.Funcs); n > 0 {
   403  				t.Funcs[n-1].End = sym.Value
   404  			}
   405  			if sym.Name == "runtime.etext" || sym.Name == "etext" {
   406  				continue
   407  			}
   408  
   409  			// Count parameter and local (auto) syms
   410  			var np, na int
   411  			var end int
   412  		countloop:
   413  			for end = i + 1; end < len(t.Syms); end++ {
   414  				switch t.Syms[end].Type {
   415  				case 'T', 't', 'L', 'l', 'Z', 'z':
   416  					break countloop
   417  				case 'p':
   418  					np++
   419  				case 'a':
   420  					na++
   421  				}
   422  			}
   423  
   424  			// Fill in the function symbol
   425  			n := len(t.Funcs)
   426  			t.Funcs = t.Funcs[0 : n+1]
   427  			fn := &t.Funcs[n]
   428  			sym.Func = fn
   429  			fn.Params = make([]*Sym, 0, np)
   430  			fn.Locals = make([]*Sym, 0, na)
   431  			fn.Sym = sym
   432  			fn.Entry = sym.Value
   433  			fn.Obj = obj
   434  			if t.go12line != nil {
   435  				// All functions share the same line table.
   436  				// It knows how to narrow down to a specific
   437  				// function quickly.
   438  				fn.LineTable = t.go12line
   439  			} else if pcln != nil {
   440  				fn.LineTable = pcln.slice(fn.Entry)
   441  				pcln = fn.LineTable
   442  			}
   443  			for j := i; j < end; j++ {
   444  				s := &t.Syms[j]
   445  				switch s.Type {
   446  				case 'm':
   447  					fn.FrameSize = int(s.Value)
   448  				case 'p':
   449  					n := len(fn.Params)
   450  					fn.Params = fn.Params[0 : n+1]
   451  					fn.Params[n] = s
   452  				case 'a':
   453  					n := len(fn.Locals)
   454  					fn.Locals = fn.Locals[0 : n+1]
   455  					fn.Locals[n] = s
   456  				}
   457  			}
   458  			i = end - 1 // loop will i++
   459  		}
   460  	}
   461  
   462  	if t.go12line != nil && nf == 0 {
   463  		t.Funcs = t.go12line.go12Funcs()
   464  	}
   465  	if obj != nil {
   466  		obj.Funcs = t.Funcs[lastf:]
   467  	}
   468  	return &t, nil
   469  }
   470  
   471  // PCToFunc returns the function containing the program counter pc,
   472  // or nil if there is no such function.
   473  func (t *Table) PCToFunc(pc uint64) *Func {
   474  	funcs := t.Funcs
   475  	for len(funcs) > 0 {
   476  		m := len(funcs) / 2
   477  		fn := &funcs[m]
   478  		switch {
   479  		case pc < fn.Entry:
   480  			funcs = funcs[0:m]
   481  		case fn.Entry <= pc && pc < fn.End:
   482  			return fn
   483  		default:
   484  			funcs = funcs[m+1:]
   485  		}
   486  	}
   487  	return nil
   488  }
   489  
   490  // PCToLine looks up line number information for a program counter.
   491  // If there is no information, it returns fn == nil.
   492  func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
   493  	if fn = t.PCToFunc(pc); fn == nil {
   494  		return
   495  	}
   496  	if t.go12line != nil {
   497  		file = t.go12line.go12PCToFile(pc)
   498  		line = t.go12line.go12PCToLine(pc)
   499  	} else {
   500  		file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
   501  	}
   502  	return
   503  }
   504  
   505  // LineToPC looks up the first program counter on the given line in
   506  // the named file.  It returns UnknownPathError or UnknownLineError if
   507  // there is an error looking up this line.
   508  func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
   509  	obj, ok := t.Files[file]
   510  	if !ok {
   511  		return 0, nil, UnknownFileError(file)
   512  	}
   513  
   514  	if t.go12line != nil {
   515  		pc := t.go12line.go12LineToPC(file, line)
   516  		if pc == 0 {
   517  			return 0, nil, &UnknownLineError{file, line}
   518  		}
   519  		return pc, t.PCToFunc(pc), nil
   520  	}
   521  
   522  	abs, err := obj.alineFromLine(file, line)
   523  	if err != nil {
   524  		return
   525  	}
   526  	for i := range obj.Funcs {
   527  		f := &obj.Funcs[i]
   528  		pc := f.LineTable.LineToPC(abs, f.End)
   529  		if pc != 0 {
   530  			return pc, f, nil
   531  		}
   532  	}
   533  	return 0, nil, &UnknownLineError{file, line}
   534  }
   535  
   536  // LookupSym returns the text, data, or bss symbol with the given name,
   537  // or nil if no such symbol is found.
   538  func (t *Table) LookupSym(name string) *Sym {
   539  	// TODO(austin) Maybe make a map
   540  	for i := range t.Syms {
   541  		s := &t.Syms[i]
   542  		switch s.Type {
   543  		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
   544  			if s.Name == name {
   545  				return s
   546  			}
   547  		}
   548  	}
   549  	return nil
   550  }
   551  
   552  // LookupFunc returns the text, data, or bss symbol with the given name,
   553  // or nil if no such symbol is found.
   554  func (t *Table) LookupFunc(name string) *Func {
   555  	for i := range t.Funcs {
   556  		f := &t.Funcs[i]
   557  		if f.Sym.Name == name {
   558  			return f
   559  		}
   560  	}
   561  	return nil
   562  }
   563  
   564  // SymByAddr returns the text, data, or bss symbol starting at the given address.
   565  func (t *Table) SymByAddr(addr uint64) *Sym {
   566  	for i := range t.Syms {
   567  		s := &t.Syms[i]
   568  		switch s.Type {
   569  		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
   570  			if s.Value == addr {
   571  				return s
   572  			}
   573  		}
   574  	}
   575  	return nil
   576  }
   577  
   578  /*
   579   * Object files
   580   */
   581  
   582  // This is legacy code for Go 1.1 and earlier, which used the
   583  // Plan 9 format for pc-line tables. This code was never quite
   584  // correct. It's probably very close, and it's usually correct, but
   585  // we never quite found all the corner cases.
   586  //
   587  // Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.
   588  
   589  func (o *Obj) lineFromAline(aline int) (string, int) {
   590  	type stackEnt struct {
   591  		path   string
   592  		start  int
   593  		offset int
   594  		prev   *stackEnt
   595  	}
   596  
   597  	noPath := &stackEnt{"", 0, 0, nil}
   598  	tos := noPath
   599  
   600  pathloop:
   601  	for _, s := range o.Paths {
   602  		val := int(s.Value)
   603  		switch {
   604  		case val > aline:
   605  			break pathloop
   606  
   607  		case val == 1:
   608  			// Start a new stack
   609  			tos = &stackEnt{s.Name, val, 0, noPath}
   610  
   611  		case s.Name == "":
   612  			// Pop
   613  			if tos == noPath {
   614  				return "<malformed symbol table>", 0
   615  			}
   616  			tos.prev.offset += val - tos.start
   617  			tos = tos.prev
   618  
   619  		default:
   620  			// Push
   621  			tos = &stackEnt{s.Name, val, 0, tos}
   622  		}
   623  	}
   624  
   625  	if tos == noPath {
   626  		return "", 0
   627  	}
   628  	return tos.path, aline - tos.start - tos.offset + 1
   629  }
   630  
   631  func (o *Obj) alineFromLine(path string, line int) (int, error) {
   632  	if line < 1 {
   633  		return 0, &UnknownLineError{path, line}
   634  	}
   635  
   636  	for i, s := range o.Paths {
   637  		// Find this path
   638  		if s.Name != path {
   639  			continue
   640  		}
   641  
   642  		// Find this line at this stack level
   643  		depth := 0
   644  		var incstart int
   645  		line += int(s.Value)
   646  	pathloop:
   647  		for _, s := range o.Paths[i:] {
   648  			val := int(s.Value)
   649  			switch {
   650  			case depth == 1 && val >= line:
   651  				return line - 1, nil
   652  
   653  			case s.Name == "":
   654  				depth--
   655  				if depth == 0 {
   656  					break pathloop
   657  				} else if depth == 1 {
   658  					line += val - incstart
   659  				}
   660  
   661  			default:
   662  				if depth == 1 {
   663  					incstart = val
   664  				}
   665  				depth++
   666  			}
   667  		}
   668  		return 0, &UnknownLineError{path, line}
   669  	}
   670  	return 0, UnknownFileError(path)
   671  }
   672  
   673  /*
   674   * Errors
   675   */
   676  
   677  // UnknownFileError represents a failure to find the specific file in
   678  // the symbol table.
   679  type UnknownFileError string
   680  
   681  func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }
   682  
   683  // UnknownLineError represents a failure to map a line to a program
   684  // counter, either because the line is beyond the bounds of the file
   685  // or because there is no code on the given line.
   686  type UnknownLineError struct {
   687  	File string
   688  	Line int
   689  }
   690  
   691  func (e *UnknownLineError) Error() string {
   692  	return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
   693  }
   694  
   695  // DecodingError represents an error during the decoding of
   696  // the symbol table.
   697  type DecodingError struct {
   698  	off int
   699  	msg string
   700  	val interface{}
   701  }
   702  
   703  func (e *DecodingError) Error() string {
   704  	msg := e.msg
   705  	if e.val != nil {
   706  		msg += fmt.Sprintf(" '%v'", e.val)
   707  	}
   708  	msg += fmt.Sprintf(" at byte %#x", e.off)
   709  	return msg
   710  }