github.com/karrick/go@v0.0.0-20170817181416-d5b0ec858b37/src/cmd/internal/obj/link.go (about)

     1  // Derived from Inferno utils/6l/l.h and related files.
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/l.h
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package obj
    32  
    33  import (
    34  	"bufio"
    35  	"cmd/internal/dwarf"
    36  	"cmd/internal/objabi"
    37  	"cmd/internal/src"
    38  	"cmd/internal/sys"
    39  	"fmt"
    40  	"sync"
    41  )
    42  
    43  // An Addr is an argument to an instruction.
    44  // The general forms and their encodings are:
    45  //
    46  //	sym±offset(symkind)(reg)(index*scale)
    47  //		Memory reference at address &sym(symkind) + offset + reg + index*scale.
    48  //		Any of sym(symkind), ±offset, (reg), (index*scale), and *scale can be omitted.
    49  //		If (reg) and *scale are both omitted, the resulting expression (index) is parsed as (reg).
    50  //		To force a parsing as index*scale, write (index*1).
    51  //		Encoding:
    52  //			type = TYPE_MEM
    53  //			name = symkind (NAME_AUTO, ...) or 0 (NAME_NONE)
    54  //			sym = sym
    55  //			offset = ±offset
    56  //			reg = reg (REG_*)
    57  //			index = index (REG_*)
    58  //			scale = scale (1, 2, 4, 8)
    59  //
    60  //	$<mem>
    61  //		Effective address of memory reference <mem>, defined above.
    62  //		Encoding: same as memory reference, but type = TYPE_ADDR.
    63  //
    64  //	$<±integer value>
    65  //		This is a special case of $<mem>, in which only ±offset is present.
    66  //		It has a separate type for easy recognition.
    67  //		Encoding:
    68  //			type = TYPE_CONST
    69  //			offset = ±integer value
    70  //
    71  //	*<mem>
    72  //		Indirect reference through memory reference <mem>, defined above.
    73  //		Only used on x86 for CALL/JMP *sym(SB), which calls/jumps to a function
    74  //		pointer stored in the data word sym(SB), not a function named sym(SB).
    75  //		Encoding: same as above, but type = TYPE_INDIR.
    76  //
    77  //	$*$<mem>
    78  //		No longer used.
    79  //		On machines with actual SB registers, $*$<mem> forced the
    80  //		instruction encoding to use a full 32-bit constant, never a
    81  //		reference relative to SB.
    82  //
    83  //	$<floating point literal>
    84  //		Floating point constant value.
    85  //		Encoding:
    86  //			type = TYPE_FCONST
    87  //			val = floating point value
    88  //
    89  //	$<string literal, up to 8 chars>
    90  //		String literal value (raw bytes used for DATA instruction).
    91  //		Encoding:
    92  //			type = TYPE_SCONST
    93  //			val = string
    94  //
    95  //	<register name>
    96  //		Any register: integer, floating point, control, segment, and so on.
    97  //		If looking for specific register kind, must check type and reg value range.
    98  //		Encoding:
    99  //			type = TYPE_REG
   100  //			reg = reg (REG_*)
   101  //
   102  //	x(PC)
   103  //		Encoding:
   104  //			type = TYPE_BRANCH
   105  //			val = Prog* reference OR ELSE offset = target pc (branch takes priority)
   106  //
   107  //	$±x-±y
   108  //		Final argument to TEXT, specifying local frame size x and argument size y.
   109  //		In this form, x and y are integer literals only, not arbitrary expressions.
   110  //		This avoids parsing ambiguities due to the use of - as a separator.
   111  //		The ± are optional.
   112  //		If the final argument to TEXT omits the -±y, the encoding should still
   113  //		use TYPE_TEXTSIZE (not TYPE_CONST), with u.argsize = ArgsSizeUnknown.
   114  //		Encoding:
   115  //			type = TYPE_TEXTSIZE
   116  //			offset = x
   117  //			val = int32(y)
   118  //
   119  //	reg<<shift, reg>>shift, reg->shift, reg@>shift
   120  //		Shifted register value, for ARM and ARM64.
   121  //		In this form, reg must be a register and shift can be a register or an integer constant.
   122  //		Encoding:
   123  //			type = TYPE_SHIFT
   124  //		On ARM:
   125  //			offset = (reg&15) | shifttype<<5 | count
   126  //			shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
   127  //			count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
   128  //		On ARM64:
   129  //			offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
   130  //			shifttype = 0, 1, 2 for <<, >>, ->
   131  //
   132  //	(reg, reg)
   133  //		A destination register pair. When used as the last argument of an instruction,
   134  //		this form makes clear that both registers are destinations.
   135  //		Encoding:
   136  //			type = TYPE_REGREG
   137  //			reg = first register
   138  //			offset = second register
   139  //
   140  //	[reg, reg, reg-reg]
   141  //		Register list for ARM.
   142  //		Encoding:
   143  //			type = TYPE_REGLIST
   144  //			offset = bit mask of registers in list; R0 is low bit.
   145  //
   146  //	reg, reg
   147  //		Register pair for ARM.
   148  //		TYPE_REGREG2
   149  //
   150  //	(reg+reg)
   151  //		Register pair for PPC64.
   152  //		Encoding:
   153  //			type = TYPE_MEM
   154  //			reg = first register
   155  //			index = second register
   156  //			scale = 1
   157  //
   158  type Addr struct {
   159  	Reg    int16
   160  	Index  int16
   161  	Scale  int16 // Sometimes holds a register.
   162  	Type   AddrType
   163  	Name   AddrName
   164  	Class  int8
   165  	Offset int64
   166  	Sym    *LSym
   167  
   168  	// argument value:
   169  	//	for TYPE_SCONST, a string
   170  	//	for TYPE_FCONST, a float64
   171  	//	for TYPE_BRANCH, a *Prog (optional)
   172  	//	for TYPE_TEXTSIZE, an int32 (optional)
   173  	Val interface{}
   174  }
   175  
   176  type AddrName int8
   177  
   178  const (
   179  	NAME_NONE AddrName = iota
   180  	NAME_EXTERN
   181  	NAME_STATIC
   182  	NAME_AUTO
   183  	NAME_PARAM
   184  	// A reference to name@GOT(SB) is a reference to the entry in the global offset
   185  	// table for 'name'.
   186  	NAME_GOTREF
   187  )
   188  
   189  type AddrType uint8
   190  
   191  const (
   192  	TYPE_NONE AddrType = iota
   193  	TYPE_BRANCH
   194  	TYPE_TEXTSIZE
   195  	TYPE_MEM
   196  	TYPE_CONST
   197  	TYPE_FCONST
   198  	TYPE_SCONST
   199  	TYPE_REG
   200  	TYPE_ADDR
   201  	TYPE_SHIFT
   202  	TYPE_REGREG
   203  	TYPE_REGREG2
   204  	TYPE_INDIR
   205  	TYPE_REGLIST
   206  )
   207  
   208  // Prog describes a single machine instruction.
   209  //
   210  // The general instruction form is:
   211  //
   212  //	As.Scond From, Reg, From3, To, RegTo2
   213  //
   214  // where As is an opcode and the others are arguments:
   215  // From, Reg, From3 are sources, and To, RegTo2 are destinations.
   216  // Usually, not all arguments are present.
   217  // For example, MOVL R1, R2 encodes using only As=MOVL, From=R1, To=R2.
   218  // The Scond field holds additional condition bits for systems (like arm)
   219  // that have generalized conditional execution.
   220  //
   221  // Jump instructions use the Pcond field to point to the target instruction,
   222  // which must be in the same linked list as the jump instruction.
   223  //
   224  // The Progs for a given function are arranged in a list linked through the Link field.
   225  //
   226  // Each Prog is charged to a specific source line in the debug information,
   227  // specified by Pos.Line().
   228  // Every Prog has a Ctxt field that defines its context.
   229  // For performance reasons, Progs usually are usually bulk allocated, cached, and reused;
   230  // those bulk allocators should always be used, rather than new(Prog).
   231  //
   232  // The other fields not yet mentioned are for use by the back ends and should
   233  // be left zeroed by creators of Prog lists.
   234  type Prog struct {
   235  	Ctxt   *Link    // linker context
   236  	Link   *Prog    // next Prog in linked list
   237  	From   Addr     // first source operand
   238  	From3  *Addr    // third source operand (second is Reg below)
   239  	To     Addr     // destination operand (second is RegTo2 below)
   240  	Pcond  *Prog    // target of conditional jump
   241  	Forwd  *Prog    // for x86 back end
   242  	Rel    *Prog    // for x86, arm back ends
   243  	Pc     int64    // for back ends or assembler: virtual or actual program counter, depending on phase
   244  	Pos    src.XPos // source position of this instruction
   245  	Spadj  int32    // effect of instruction on stack pointer (increment or decrement amount)
   246  	As     As       // assembler opcode
   247  	Reg    int16    // 2nd source operand
   248  	RegTo2 int16    // 2nd destination operand
   249  	Mark   uint16   // bitmask of arch-specific items
   250  	Optab  uint16   // arch-specific opcode index
   251  	Scond  uint8    // condition bits for conditional instruction (e.g., on ARM)
   252  	Back   uint8    // for x86 back end: backwards branch state
   253  	Ft     uint8    // for x86 back end: type index of Prog.From
   254  	Tt     uint8    // for x86 back end: type index of Prog.To
   255  	Isize  uint8    // for x86 back end: size of the instruction in bytes
   256  }
   257  
   258  // From3Type returns From3.Type, or TYPE_NONE when From3 is nil.
   259  func (p *Prog) From3Type() AddrType {
   260  	if p.From3 == nil {
   261  		return TYPE_NONE
   262  	}
   263  	return p.From3.Type
   264  }
   265  
   266  // An As denotes an assembler opcode.
   267  // There are some portable opcodes, declared here in package obj,
   268  // that are common to all architectures.
   269  // However, the majority of opcodes are arch-specific
   270  // and are declared in their respective architecture's subpackage.
   271  type As int16
   272  
   273  // These are the portable opcodes.
   274  const (
   275  	AXXX As = iota
   276  	ACALL
   277  	ADUFFCOPY
   278  	ADUFFZERO
   279  	AEND
   280  	AFUNCDATA
   281  	AJMP
   282  	ANOP
   283  	APCDATA
   284  	ARET
   285  	ATEXT
   286  	AUNDEF
   287  	A_ARCHSPECIFIC
   288  )
   289  
   290  // Each architecture is allotted a distinct subspace of opcode values
   291  // for declaring its arch-specific opcodes.
   292  // Within this subspace, the first arch-specific opcode should be
   293  // at offset A_ARCHSPECIFIC.
   294  //
   295  // Subspaces are aligned to a power of two so opcodes can be masked
   296  // with AMask and used as compact array indices.
   297  const (
   298  	ABase386 = (1 + iota) << 10
   299  	ABaseARM
   300  	ABaseAMD64
   301  	ABasePPC64
   302  	ABaseARM64
   303  	ABaseMIPS
   304  	ABaseS390X
   305  
   306  	AllowedOpCodes = 1 << 10            // The number of opcodes available for any given architecture.
   307  	AMask          = AllowedOpCodes - 1 // AND with this to use the opcode as an array index.
   308  )
   309  
   310  // An LSym is the sort of symbol that is written to an object file.
   311  type LSym struct {
   312  	Name string
   313  	Type objabi.SymKind
   314  	Attribute
   315  
   316  	RefIdx int // Index of this symbol in the symbol reference list.
   317  	Size   int64
   318  	Gotype *LSym
   319  	P      []byte
   320  	R      []Reloc
   321  
   322  	Func *FuncInfo
   323  }
   324  
   325  // A FuncInfo contains extra fields for STEXT symbols.
   326  type FuncInfo struct {
   327  	Args   int32
   328  	Locals int32
   329  	Text   *Prog
   330  	Autom  []*Auto
   331  	Pcln   Pcln
   332  
   333  	dwarfInfoSym   *LSym
   334  	dwarfLocSym    *LSym
   335  	dwarfRangesSym *LSym
   336  
   337  	GCArgs   LSym
   338  	GCLocals LSym
   339  }
   340  
   341  // Attribute is a set of symbol attributes.
   342  type Attribute int16
   343  
   344  const (
   345  	AttrDuplicateOK Attribute = 1 << iota
   346  	AttrCFunc
   347  	AttrNoSplit
   348  	AttrLeaf
   349  	AttrWrapper
   350  	AttrNeedCtxt
   351  	AttrNoFrame
   352  	AttrSeenGlobl
   353  	AttrOnList
   354  	AttrStatic
   355  
   356  	// MakeTypelink means that the type should have an entry in the typelink table.
   357  	AttrMakeTypelink
   358  
   359  	// ReflectMethod means the function may call reflect.Type.Method or
   360  	// reflect.Type.MethodByName. Matching is imprecise (as reflect.Type
   361  	// can be used through a custom interface), so ReflectMethod may be
   362  	// set in some cases when the reflect package is not called.
   363  	//
   364  	// Used by the linker to determine what methods can be pruned.
   365  	AttrReflectMethod
   366  
   367  	// Local means make the symbol local even when compiling Go code to reference Go
   368  	// symbols in other shared libraries, as in this mode symbols are global by
   369  	// default. "local" here means in the sense of the dynamic linker, i.e. not
   370  	// visible outside of the module (shared library or executable) that contains its
   371  	// definition. (When not compiling to support Go shared libraries, all symbols are
   372  	// local in this sense unless there is a cgo_export_* directive).
   373  	AttrLocal
   374  )
   375  
   376  func (a Attribute) DuplicateOK() bool   { return a&AttrDuplicateOK != 0 }
   377  func (a Attribute) MakeTypelink() bool  { return a&AttrMakeTypelink != 0 }
   378  func (a Attribute) CFunc() bool         { return a&AttrCFunc != 0 }
   379  func (a Attribute) NoSplit() bool       { return a&AttrNoSplit != 0 }
   380  func (a Attribute) Leaf() bool          { return a&AttrLeaf != 0 }
   381  func (a Attribute) SeenGlobl() bool     { return a&AttrSeenGlobl != 0 }
   382  func (a Attribute) OnList() bool        { return a&AttrOnList != 0 }
   383  func (a Attribute) ReflectMethod() bool { return a&AttrReflectMethod != 0 }
   384  func (a Attribute) Local() bool         { return a&AttrLocal != 0 }
   385  func (a Attribute) Wrapper() bool       { return a&AttrWrapper != 0 }
   386  func (a Attribute) NeedCtxt() bool      { return a&AttrNeedCtxt != 0 }
   387  func (a Attribute) NoFrame() bool       { return a&AttrNoFrame != 0 }
   388  func (a Attribute) Static() bool        { return a&AttrStatic != 0 }
   389  
   390  func (a *Attribute) Set(flag Attribute, value bool) {
   391  	if value {
   392  		*a |= flag
   393  	} else {
   394  		*a &^= flag
   395  	}
   396  }
   397  
   398  var textAttrStrings = [...]struct {
   399  	bit Attribute
   400  	s   string
   401  }{
   402  	{bit: AttrDuplicateOK, s: "DUPOK"},
   403  	{bit: AttrMakeTypelink, s: ""},
   404  	{bit: AttrCFunc, s: "CFUNC"},
   405  	{bit: AttrNoSplit, s: "NOSPLIT"},
   406  	{bit: AttrLeaf, s: "LEAF"},
   407  	{bit: AttrSeenGlobl, s: ""},
   408  	{bit: AttrOnList, s: ""},
   409  	{bit: AttrReflectMethod, s: "REFLECTMETHOD"},
   410  	{bit: AttrLocal, s: "LOCAL"},
   411  	{bit: AttrWrapper, s: "WRAPPER"},
   412  	{bit: AttrNeedCtxt, s: "NEEDCTXT"},
   413  	{bit: AttrNoFrame, s: "NOFRAME"},
   414  	{bit: AttrStatic, s: "STATIC"},
   415  }
   416  
   417  // TextAttrString formats a for printing in as part of a TEXT prog.
   418  func (a Attribute) TextAttrString() string {
   419  	var s string
   420  	for _, x := range textAttrStrings {
   421  		if a&x.bit != 0 {
   422  			if x.s != "" {
   423  				s += x.s + "|"
   424  			}
   425  			a &^= x.bit
   426  		}
   427  	}
   428  	if a != 0 {
   429  		s += fmt.Sprintf("UnknownAttribute(%d)|", a)
   430  	}
   431  	// Chop off trailing |, if present.
   432  	if len(s) > 0 {
   433  		s = s[:len(s)-1]
   434  	}
   435  	return s
   436  }
   437  
   438  // The compiler needs LSym to satisfy fmt.Stringer, because it stores
   439  // an LSym in ssa.ExternSymbol.
   440  func (s *LSym) String() string {
   441  	return s.Name
   442  }
   443  
   444  type Pcln struct {
   445  	Pcsp        Pcdata
   446  	Pcfile      Pcdata
   447  	Pcline      Pcdata
   448  	Pcinline    Pcdata
   449  	Pcdata      []Pcdata
   450  	Funcdata    []*LSym
   451  	Funcdataoff []int64
   452  	File        []string
   453  	Lastfile    string
   454  	Lastindex   int
   455  	InlTree     InlTree // per-function inlining tree extracted from the global tree
   456  }
   457  
   458  type Reloc struct {
   459  	Off  int32
   460  	Siz  uint8
   461  	Type objabi.RelocType
   462  	Add  int64
   463  	Sym  *LSym
   464  }
   465  
   466  type Auto struct {
   467  	Asym    *LSym
   468  	Aoffset int32
   469  	Name    AddrName
   470  	Gotype  *LSym
   471  }
   472  
   473  type Pcdata struct {
   474  	P []byte
   475  }
   476  
   477  // Link holds the context for writing object code from a compiler
   478  // to be linker input or for reading that input into the linker.
   479  type Link struct {
   480  	Headtype           objabi.HeadType
   481  	Arch               *LinkArch
   482  	Debugasm           bool
   483  	Debugvlog          bool
   484  	Debugpcln          string
   485  	Flag_shared        bool
   486  	Flag_dynlink       bool
   487  	Flag_optimize      bool
   488  	Flag_locationlists bool
   489  	Bso                *bufio.Writer
   490  	Pathname           string
   491  	hashmu             sync.Mutex       // protects hash
   492  	hash               map[string]*LSym // name -> sym mapping
   493  	statichash         map[string]*LSym // name -> sym mapping for static syms
   494  	PosTable           src.PosTable
   495  	InlTree            InlTree // global inlining tree used by gc/inl.go
   496  	Imports            []string
   497  	DiagFunc           func(string, ...interface{})
   498  	DebugInfo          func(fn *LSym, curfn interface{}) []dwarf.Scope // if non-nil, curfn is a *gc.Node
   499  	Errors             int
   500  
   501  	Framepointer_enabled bool
   502  
   503  	// state for writing objects
   504  	Text []*LSym
   505  	Data []*LSym
   506  }
   507  
   508  func (ctxt *Link) Diag(format string, args ...interface{}) {
   509  	ctxt.Errors++
   510  	ctxt.DiagFunc(format, args...)
   511  }
   512  
   513  func (ctxt *Link) Logf(format string, args ...interface{}) {
   514  	fmt.Fprintf(ctxt.Bso, format, args...)
   515  	ctxt.Bso.Flush()
   516  }
   517  
   518  // The smallest possible offset from the hardware stack pointer to a local
   519  // variable on the stack. Architectures that use a link register save its value
   520  // on the stack in the function prologue and so always have a pointer between
   521  // the hardware stack pointer and the local variable area.
   522  func (ctxt *Link) FixedFrameSize() int64 {
   523  	switch ctxt.Arch.Family {
   524  	case sys.AMD64, sys.I386:
   525  		return 0
   526  	case sys.PPC64:
   527  		// PIC code on ppc64le requires 32 bytes of stack, and it's easier to
   528  		// just use that much stack always on ppc64x.
   529  		return int64(4 * ctxt.Arch.PtrSize)
   530  	default:
   531  		return int64(ctxt.Arch.PtrSize)
   532  	}
   533  }
   534  
   535  // LinkArch is the definition of a single architecture.
   536  type LinkArch struct {
   537  	*sys.Arch
   538  	Init           func(*Link)
   539  	Preprocess     func(*Link, *LSym, ProgAlloc)
   540  	Assemble       func(*Link, *LSym, ProgAlloc)
   541  	Progedit       func(*Link, *Prog, ProgAlloc)
   542  	UnaryDst       map[As]bool // Instruction takes one operand, a destination.
   543  	DWARFRegisters map[int16]int16
   544  }