github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			var c *obj.Prog
   202  			switch v.Op {
   203  			case ssa.Op386DIVL, ssa.Op386MODL:
   204  				c = s.Prog(x86.ACMPL)
   205  				j = s.Prog(x86.AJEQ)
   206  				s.Prog(x86.ACDQ) //TODO: fix
   207  
   208  			case ssa.Op386DIVW, ssa.Op386MODW:
   209  				c = s.Prog(x86.ACMPW)
   210  				j = s.Prog(x86.AJEQ)
   211  				s.Prog(x86.ACWD)
   212  			}
   213  			c.From.Type = obj.TYPE_REG
   214  			c.From.Reg = x
   215  			c.To.Type = obj.TYPE_CONST
   216  			c.To.Offset = -1
   217  
   218  			j.To.Type = obj.TYPE_BRANCH
   219  		}
   220  
   221  		// for unsigned ints, we sign extend by setting DX = 0
   222  		// signed ints were sign extended above
   223  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   224  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   225  			c := s.Prog(x86.AXORL)
   226  			c.From.Type = obj.TYPE_REG
   227  			c.From.Reg = x86.REG_DX
   228  			c.To.Type = obj.TYPE_REG
   229  			c.To.Reg = x86.REG_DX
   230  		}
   231  
   232  		p := s.Prog(v.Op.Asm())
   233  		p.From.Type = obj.TYPE_REG
   234  		p.From.Reg = x
   235  
   236  		// signed division, rest of the check for -1 case
   237  		if j != nil {
   238  			j2 := s.Prog(obj.AJMP)
   239  			j2.To.Type = obj.TYPE_BRANCH
   240  
   241  			var n *obj.Prog
   242  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   243  				// n * -1 = -n
   244  				n = s.Prog(x86.ANEGL)
   245  				n.To.Type = obj.TYPE_REG
   246  				n.To.Reg = x86.REG_AX
   247  			} else {
   248  				// n % -1 == 0
   249  				n = s.Prog(x86.AXORL)
   250  				n.From.Type = obj.TYPE_REG
   251  				n.From.Reg = x86.REG_DX
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_DX
   254  			}
   255  
   256  			j.To.Val = n
   257  			j2.To.Val = s.Pc()
   258  		}
   259  
   260  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   261  		// the frontend rewrites constant division by 8/16/32 bit integers into
   262  		// HMUL by a constant
   263  		// SSA rewrites generate the 64 bit versions
   264  
   265  		// Arg[0] is already in AX as it's the only register we allow
   266  		// and DX is the only output we care about (the high bits)
   267  		p := s.Prog(v.Op.Asm())
   268  		p.From.Type = obj.TYPE_REG
   269  		p.From.Reg = v.Args[1].Reg()
   270  
   271  		// IMULB puts the high portion in AH instead of DL,
   272  		// so move it to DL for consistency
   273  		if v.Type.Size() == 1 {
   274  			m := s.Prog(x86.AMOVB)
   275  			m.From.Type = obj.TYPE_REG
   276  			m.From.Reg = x86.REG_AH
   277  			m.To.Type = obj.TYPE_REG
   278  			m.To.Reg = x86.REG_DX
   279  		}
   280  
   281  	case ssa.Op386MULLQU:
   282  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   283  		p := s.Prog(v.Op.Asm())
   284  		p.From.Type = obj.TYPE_REG
   285  		p.From.Reg = v.Args[1].Reg()
   286  
   287  	case ssa.Op386AVGLU:
   288  		// compute (x+y)/2 unsigned.
   289  		// Do a 32-bit add, the overflow goes into the carry.
   290  		// Shift right once and pull the carry back into the 31st bit.
   291  		r := v.Reg()
   292  		if r != v.Args[0].Reg() {
   293  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   294  		}
   295  		p := s.Prog(x86.AADDL)
   296  		p.From.Type = obj.TYPE_REG
   297  		p.To.Type = obj.TYPE_REG
   298  		p.To.Reg = r
   299  		p.From.Reg = v.Args[1].Reg()
   300  		p = s.Prog(x86.ARCRL)
   301  		p.From.Type = obj.TYPE_CONST
   302  		p.From.Offset = 1
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = r
   305  
   306  	case ssa.Op386ADDLconst:
   307  		r := v.Reg()
   308  		a := v.Args[0].Reg()
   309  		if r == a {
   310  			if v.AuxInt == 1 {
   311  				p := s.Prog(x86.AINCL)
   312  				p.To.Type = obj.TYPE_REG
   313  				p.To.Reg = r
   314  				return
   315  			}
   316  			if v.AuxInt == -1 {
   317  				p := s.Prog(x86.ADECL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			p := s.Prog(v.Op.Asm())
   323  			p.From.Type = obj.TYPE_CONST
   324  			p.From.Offset = v.AuxInt
   325  			p.To.Type = obj.TYPE_REG
   326  			p.To.Reg = r
   327  			return
   328  		}
   329  		p := s.Prog(x86.ALEAL)
   330  		p.From.Type = obj.TYPE_MEM
   331  		p.From.Reg = a
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  
   336  	case ssa.Op386MULLconst:
   337  		r := v.Reg()
   338  		if r != v.Args[0].Reg() {
   339  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   340  		}
   341  		p := s.Prog(v.Op.Asm())
   342  		p.From.Type = obj.TYPE_CONST
   343  		p.From.Offset = v.AuxInt
   344  		p.To.Type = obj.TYPE_REG
   345  		p.To.Reg = r
   346  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   347  		// then we don't need to use resultInArg0 for these ops.
   348  		//p.From3 = new(obj.Addr)
   349  		//p.From3.Type = obj.TYPE_REG
   350  		//p.From3.Reg = v.Args[0].Reg()
   351  
   352  	case ssa.Op386SUBLconst,
   353  		ssa.Op386ADCLconst,
   354  		ssa.Op386SBBLconst,
   355  		ssa.Op386ANDLconst,
   356  		ssa.Op386ORLconst,
   357  		ssa.Op386XORLconst,
   358  		ssa.Op386SHLLconst,
   359  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   360  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   361  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   362  		r := v.Reg()
   363  		if r != v.Args[0].Reg() {
   364  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   365  		}
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = v.AuxInt
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = r
   371  	case ssa.Op386SBBLcarrymask:
   372  		r := v.Reg()
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_REG
   375  		p.From.Reg = r
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   379  		r := v.Args[0].Reg()
   380  		i := v.Args[1].Reg()
   381  		p := s.Prog(x86.ALEAL)
   382  		switch v.Op {
   383  		case ssa.Op386LEAL1:
   384  			p.From.Scale = 1
   385  			if i == x86.REG_SP {
   386  				r, i = i, r
   387  			}
   388  		case ssa.Op386LEAL2:
   389  			p.From.Scale = 2
   390  		case ssa.Op386LEAL4:
   391  			p.From.Scale = 4
   392  		case ssa.Op386LEAL8:
   393  			p.From.Scale = 8
   394  		}
   395  		p.From.Type = obj.TYPE_MEM
   396  		p.From.Reg = r
   397  		p.From.Index = i
   398  		gc.AddAux(&p.From, v)
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg()
   401  	case ssa.Op386LEAL:
   402  		p := s.Prog(x86.ALEAL)
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = v.Args[0].Reg()
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   409  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   410  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   411  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   412  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   413  		// must account for that right here.
   414  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   415  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_REG
   418  		p.From.Reg = v.Args[0].Reg()
   419  		p.To.Type = obj.TYPE_CONST
   420  		p.To.Offset = v.AuxInt
   421  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_CONST
   424  		p.From.Offset = v.AuxInt
   425  		p.To.Type = obj.TYPE_REG
   426  		p.To.Reg = v.Args[0].Reg()
   427  	case ssa.Op386MOVLconst:
   428  		x := v.Reg()
   429  		p := s.Prog(v.Op.Asm())
   430  		p.From.Type = obj.TYPE_CONST
   431  		p.From.Offset = v.AuxInt
   432  		p.To.Type = obj.TYPE_REG
   433  		p.To.Reg = x
   434  		// If flags are live at this instruction, suppress the
   435  		// MOV $0,AX -> XOR AX,AX optimization.
   436  		if v.Aux != nil {
   437  			p.Mark |= x86.PRESERVEFLAGS
   438  		}
   439  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   440  		x := v.Reg()
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = x
   446  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   447  		p := s.Prog(x86.ALEAL)
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Name = obj.NAME_EXTERN
   450  		f := math.Float64frombits(uint64(v.AuxInt))
   451  		if v.Op == ssa.Op386MOVSDconst1 {
   452  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   453  		} else {
   454  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   455  		}
   456  		p.To.Type = obj.TYPE_REG
   457  		p.To.Reg = v.Reg()
   458  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_MEM
   461  		p.From.Reg = v.Args[0].Reg()
   462  		p.To.Type = obj.TYPE_REG
   463  		p.To.Reg = v.Reg()
   464  
   465  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_MEM
   468  		p.From.Reg = v.Args[0].Reg()
   469  		gc.AddAux(&p.From, v)
   470  		p.To.Type = obj.TYPE_REG
   471  		p.To.Reg = v.Reg()
   472  	case ssa.Op386MOVSDloadidx8:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.From.Scale = 8
   478  		p.From.Index = v.Args[1].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		gc.AddAux(&p.From, v)
   486  		p.From.Scale = 4
   487  		p.From.Index = v.Args[1].Reg()
   488  		p.To.Type = obj.TYPE_REG
   489  		p.To.Reg = v.Reg()
   490  	case ssa.Op386MOVWloadidx2:
   491  		p := s.Prog(v.Op.Asm())
   492  		p.From.Type = obj.TYPE_MEM
   493  		p.From.Reg = v.Args[0].Reg()
   494  		gc.AddAux(&p.From, v)
   495  		p.From.Scale = 2
   496  		p.From.Index = v.Args[1].Reg()
   497  		p.To.Type = obj.TYPE_REG
   498  		p.To.Reg = v.Reg()
   499  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   500  		r := v.Args[0].Reg()
   501  		i := v.Args[1].Reg()
   502  		if i == x86.REG_SP {
   503  			r, i = i, r
   504  		}
   505  		p := s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_MEM
   507  		p.From.Reg = r
   508  		p.From.Scale = 1
   509  		p.From.Index = i
   510  		gc.AddAux(&p.From, v)
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = v.Reg()
   513  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_REG
   516  		p.From.Reg = v.Args[1].Reg()
   517  		p.To.Type = obj.TYPE_MEM
   518  		p.To.Reg = v.Args[0].Reg()
   519  		gc.AddAux(&p.To, v)
   520  	case ssa.Op386MOVSDstoreidx8:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[2].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		p.To.Scale = 8
   527  		p.To.Index = v.Args[1].Reg()
   528  		gc.AddAux(&p.To, v)
   529  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   530  		p := s.Prog(v.Op.Asm())
   531  		p.From.Type = obj.TYPE_REG
   532  		p.From.Reg = v.Args[2].Reg()
   533  		p.To.Type = obj.TYPE_MEM
   534  		p.To.Reg = v.Args[0].Reg()
   535  		p.To.Scale = 4
   536  		p.To.Index = v.Args[1].Reg()
   537  		gc.AddAux(&p.To, v)
   538  	case ssa.Op386MOVWstoreidx2:
   539  		p := s.Prog(v.Op.Asm())
   540  		p.From.Type = obj.TYPE_REG
   541  		p.From.Reg = v.Args[2].Reg()
   542  		p.To.Type = obj.TYPE_MEM
   543  		p.To.Reg = v.Args[0].Reg()
   544  		p.To.Scale = 2
   545  		p.To.Index = v.Args[1].Reg()
   546  		gc.AddAux(&p.To, v)
   547  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   548  		r := v.Args[0].Reg()
   549  		i := v.Args[1].Reg()
   550  		if i == x86.REG_SP {
   551  			r, i = i, r
   552  		}
   553  		p := s.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_REG
   555  		p.From.Reg = v.Args[2].Reg()
   556  		p.To.Type = obj.TYPE_MEM
   557  		p.To.Reg = r
   558  		p.To.Scale = 1
   559  		p.To.Index = i
   560  		gc.AddAux(&p.To, v)
   561  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   562  		p := s.Prog(v.Op.Asm())
   563  		p.From.Type = obj.TYPE_CONST
   564  		sc := v.AuxValAndOff()
   565  		p.From.Offset = sc.Val()
   566  		p.To.Type = obj.TYPE_MEM
   567  		p.To.Reg = v.Args[0].Reg()
   568  		gc.AddAux2(&p.To, v, sc.Off())
   569  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_CONST
   572  		sc := v.AuxValAndOff()
   573  		p.From.Offset = sc.Val()
   574  		r := v.Args[0].Reg()
   575  		i := v.Args[1].Reg()
   576  		switch v.Op {
   577  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   578  			p.To.Scale = 1
   579  			if i == x86.REG_SP {
   580  				r, i = i, r
   581  			}
   582  		case ssa.Op386MOVWstoreconstidx2:
   583  			p.To.Scale = 2
   584  		case ssa.Op386MOVLstoreconstidx4:
   585  			p.To.Scale = 4
   586  		}
   587  		p.To.Type = obj.TYPE_MEM
   588  		p.To.Reg = r
   589  		p.To.Index = i
   590  		gc.AddAux2(&p.To, v, sc.Off())
   591  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   592  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   593  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   594  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   595  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   596  	case ssa.Op386DUFFZERO:
   597  		p := s.Prog(obj.ADUFFZERO)
   598  		p.To.Type = obj.TYPE_ADDR
   599  		p.To.Sym = gc.Duffzero
   600  		p.To.Offset = v.AuxInt
   601  	case ssa.Op386DUFFCOPY:
   602  		p := s.Prog(obj.ADUFFCOPY)
   603  		p.To.Type = obj.TYPE_ADDR
   604  		p.To.Sym = gc.Duffcopy
   605  		p.To.Offset = v.AuxInt
   606  
   607  	case ssa.Op386MOVLconvert:
   608  		if v.Args[0].Reg() != v.Reg() {
   609  			v.Fatalf("MOVLconvert should be a no-op")
   610  		}
   611  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   612  		if v.Type.IsMemory() {
   613  			return
   614  		}
   615  		x := v.Args[0].Reg()
   616  		y := v.Reg()
   617  		if x != y {
   618  			opregreg(s, moveByType(v.Type), y, x)
   619  		}
   620  	case ssa.OpLoadReg:
   621  		if v.Type.IsFlags() {
   622  			v.Fatalf("load flags not implemented: %v", v.LongString())
   623  			return
   624  		}
   625  		p := s.Prog(loadByType(v.Type))
   626  		gc.AddrAuto(&p.From, v.Args[0])
   627  		p.To.Type = obj.TYPE_REG
   628  		p.To.Reg = v.Reg()
   629  
   630  	case ssa.OpStoreReg:
   631  		if v.Type.IsFlags() {
   632  			v.Fatalf("store flags not implemented: %v", v.LongString())
   633  			return
   634  		}
   635  		p := s.Prog(storeByType(v.Type))
   636  		p.From.Type = obj.TYPE_REG
   637  		p.From.Reg = v.Args[0].Reg()
   638  		gc.AddrAuto(&p.To, v)
   639  	case ssa.Op386LoweredGetClosurePtr:
   640  		// Closure pointer is DX.
   641  		gc.CheckLoweredGetClosurePtr(v)
   642  	case ssa.Op386LoweredGetG:
   643  		r := v.Reg()
   644  		// See the comments in cmd/internal/obj/x86/obj6.go
   645  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   646  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   647  			// MOVL (TLS), r
   648  			p := s.Prog(x86.AMOVL)
   649  			p.From.Type = obj.TYPE_MEM
   650  			p.From.Reg = x86.REG_TLS
   651  			p.To.Type = obj.TYPE_REG
   652  			p.To.Reg = r
   653  		} else {
   654  			// MOVL TLS, r
   655  			// MOVL (r)(TLS*1), r
   656  			p := s.Prog(x86.AMOVL)
   657  			p.From.Type = obj.TYPE_REG
   658  			p.From.Reg = x86.REG_TLS
   659  			p.To.Type = obj.TYPE_REG
   660  			p.To.Reg = r
   661  			q := s.Prog(x86.AMOVL)
   662  			q.From.Type = obj.TYPE_MEM
   663  			q.From.Reg = r
   664  			q.From.Index = x86.REG_TLS
   665  			q.From.Scale = 1
   666  			q.To.Type = obj.TYPE_REG
   667  			q.To.Reg = r
   668  		}
   669  
   670  	case ssa.Op386LoweredGetCallerPC:
   671  		p := s.Prog(x86.AMOVL)
   672  		p.From.Type = obj.TYPE_MEM
   673  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   674  		p.From.Name = obj.NAME_PARAM
   675  		p.To.Type = obj.TYPE_REG
   676  		p.To.Reg = v.Reg()
   677  
   678  	case ssa.Op386LoweredGetCallerSP:
   679  		// caller's SP is the address of the first arg
   680  		p := s.Prog(x86.AMOVL)
   681  		p.From.Type = obj.TYPE_ADDR
   682  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   683  		p.From.Name = obj.NAME_PARAM
   684  		p.To.Type = obj.TYPE_REG
   685  		p.To.Reg = v.Reg()
   686  
   687  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   688  		s.Call(v)
   689  	case ssa.Op386NEGL,
   690  		ssa.Op386BSWAPL,
   691  		ssa.Op386NOTL:
   692  		r := v.Reg()
   693  		if r != v.Args[0].Reg() {
   694  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   695  		}
   696  		p := s.Prog(v.Op.Asm())
   697  		p.To.Type = obj.TYPE_REG
   698  		p.To.Reg = r
   699  	case ssa.Op386BSFL, ssa.Op386BSFW,
   700  		ssa.Op386BSRL, ssa.Op386BSRW,
   701  		ssa.Op386SQRTSD:
   702  		p := s.Prog(v.Op.Asm())
   703  		p.From.Type = obj.TYPE_REG
   704  		p.From.Reg = v.Args[0].Reg()
   705  		p.To.Type = obj.TYPE_REG
   706  		p.To.Reg = v.Reg()
   707  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   708  		ssa.Op386SETL, ssa.Op386SETLE,
   709  		ssa.Op386SETG, ssa.Op386SETGE,
   710  		ssa.Op386SETGF, ssa.Op386SETGEF,
   711  		ssa.Op386SETB, ssa.Op386SETBE,
   712  		ssa.Op386SETORD, ssa.Op386SETNAN,
   713  		ssa.Op386SETA, ssa.Op386SETAE:
   714  		p := s.Prog(v.Op.Asm())
   715  		p.To.Type = obj.TYPE_REG
   716  		p.To.Reg = v.Reg()
   717  
   718  	case ssa.Op386SETNEF:
   719  		p := s.Prog(v.Op.Asm())
   720  		p.To.Type = obj.TYPE_REG
   721  		p.To.Reg = v.Reg()
   722  		q := s.Prog(x86.ASETPS)
   723  		q.To.Type = obj.TYPE_REG
   724  		q.To.Reg = x86.REG_AX
   725  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   726  
   727  	case ssa.Op386SETEQF:
   728  		p := s.Prog(v.Op.Asm())
   729  		p.To.Type = obj.TYPE_REG
   730  		p.To.Reg = v.Reg()
   731  		q := s.Prog(x86.ASETPC)
   732  		q.To.Type = obj.TYPE_REG
   733  		q.To.Reg = x86.REG_AX
   734  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   735  
   736  	case ssa.Op386InvertFlags:
   737  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   738  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   739  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   740  	case ssa.Op386REPSTOSL:
   741  		s.Prog(x86.AREP)
   742  		s.Prog(x86.ASTOSL)
   743  	case ssa.Op386REPMOVSL:
   744  		s.Prog(x86.AREP)
   745  		s.Prog(x86.AMOVSL)
   746  	case ssa.Op386LoweredNilCheck:
   747  		// Issue a load which will fault if the input is nil.
   748  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   749  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   750  		// but it doesn't have false dependency on AX.
   751  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   752  		// That trades clobbering flags for clobbering a register.
   753  		p := s.Prog(x86.ATESTB)
   754  		p.From.Type = obj.TYPE_REG
   755  		p.From.Reg = x86.REG_AX
   756  		p.To.Type = obj.TYPE_MEM
   757  		p.To.Reg = v.Args[0].Reg()
   758  		gc.AddAux(&p.To, v)
   759  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   760  			gc.Warnl(v.Pos, "generated nil check")
   761  		}
   762  	case ssa.Op386FCHS:
   763  		v.Fatalf("FCHS in non-387 mode")
   764  	case ssa.OpClobber:
   765  		p := s.Prog(x86.AMOVL)
   766  		p.From.Type = obj.TYPE_CONST
   767  		p.From.Offset = 0xdeaddead
   768  		p.To.Type = obj.TYPE_MEM
   769  		p.To.Reg = x86.REG_SP
   770  		gc.AddAux(&p.To, v)
   771  	default:
   772  		v.Fatalf("genValue not implemented: %s", v.LongString())
   773  	}
   774  }
   775  
   776  var blockJump = [...]struct {
   777  	asm, invasm obj.As
   778  }{
   779  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   780  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   781  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   782  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   783  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   784  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   785  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   786  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   787  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   788  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   789  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   790  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   791  }
   792  
   793  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   794  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   795  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   796  }
   797  var nefJumps = [2][2]gc.FloatingEQNEJump{
   798  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   799  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   800  }
   801  
   802  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   803  	switch b.Kind {
   804  	case ssa.BlockPlain:
   805  		if b.Succs[0].Block() != next {
   806  			p := s.Prog(obj.AJMP)
   807  			p.To.Type = obj.TYPE_BRANCH
   808  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   809  		}
   810  	case ssa.BlockDefer:
   811  		// defer returns in rax:
   812  		// 0 if we should continue executing
   813  		// 1 if we should jump to deferreturn call
   814  		p := s.Prog(x86.ATESTL)
   815  		p.From.Type = obj.TYPE_REG
   816  		p.From.Reg = x86.REG_AX
   817  		p.To.Type = obj.TYPE_REG
   818  		p.To.Reg = x86.REG_AX
   819  		p = s.Prog(x86.AJNE)
   820  		p.To.Type = obj.TYPE_BRANCH
   821  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   822  		if b.Succs[0].Block() != next {
   823  			p := s.Prog(obj.AJMP)
   824  			p.To.Type = obj.TYPE_BRANCH
   825  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   826  		}
   827  	case ssa.BlockExit:
   828  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   829  	case ssa.BlockRet:
   830  		s.Prog(obj.ARET)
   831  	case ssa.BlockRetJmp:
   832  		p := s.Prog(obj.AJMP)
   833  		p.To.Type = obj.TYPE_MEM
   834  		p.To.Name = obj.NAME_EXTERN
   835  		p.To.Sym = b.Aux.(*obj.LSym)
   836  
   837  	case ssa.Block386EQF:
   838  		s.FPJump(b, next, &eqfJumps)
   839  
   840  	case ssa.Block386NEF:
   841  		s.FPJump(b, next, &nefJumps)
   842  
   843  	case ssa.Block386EQ, ssa.Block386NE,
   844  		ssa.Block386LT, ssa.Block386GE,
   845  		ssa.Block386LE, ssa.Block386GT,
   846  		ssa.Block386ULT, ssa.Block386UGT,
   847  		ssa.Block386ULE, ssa.Block386UGE:
   848  		jmp := blockJump[b.Kind]
   849  		var p *obj.Prog
   850  		switch next {
   851  		case b.Succs[0].Block():
   852  			p = s.Prog(jmp.invasm)
   853  			p.To.Type = obj.TYPE_BRANCH
   854  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   855  		case b.Succs[1].Block():
   856  			p = s.Prog(jmp.asm)
   857  			p.To.Type = obj.TYPE_BRANCH
   858  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   859  		default:
   860  			p = s.Prog(jmp.asm)
   861  			p.To.Type = obj.TYPE_BRANCH
   862  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   863  			q := s.Prog(obj.AJMP)
   864  			q.To.Type = obj.TYPE_BRANCH
   865  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   866  		}
   867  
   868  	default:
   869  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   870  	}
   871  }