github.com/Filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			var c *obj.Prog
   202  			switch v.Op {
   203  			case ssa.Op386DIVL, ssa.Op386MODL:
   204  				c = s.Prog(x86.ACMPL)
   205  				j = s.Prog(x86.AJEQ)
   206  				s.Prog(x86.ACDQ) //TODO: fix
   207  
   208  			case ssa.Op386DIVW, ssa.Op386MODW:
   209  				c = s.Prog(x86.ACMPW)
   210  				j = s.Prog(x86.AJEQ)
   211  				s.Prog(x86.ACWD)
   212  			}
   213  			c.From.Type = obj.TYPE_REG
   214  			c.From.Reg = x
   215  			c.To.Type = obj.TYPE_CONST
   216  			c.To.Offset = -1
   217  
   218  			j.To.Type = obj.TYPE_BRANCH
   219  		}
   220  
   221  		// for unsigned ints, we sign extend by setting DX = 0
   222  		// signed ints were sign extended above
   223  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   224  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   225  			c := s.Prog(x86.AXORL)
   226  			c.From.Type = obj.TYPE_REG
   227  			c.From.Reg = x86.REG_DX
   228  			c.To.Type = obj.TYPE_REG
   229  			c.To.Reg = x86.REG_DX
   230  		}
   231  
   232  		p := s.Prog(v.Op.Asm())
   233  		p.From.Type = obj.TYPE_REG
   234  		p.From.Reg = x
   235  
   236  		// signed division, rest of the check for -1 case
   237  		if j != nil {
   238  			j2 := s.Prog(obj.AJMP)
   239  			j2.To.Type = obj.TYPE_BRANCH
   240  
   241  			var n *obj.Prog
   242  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   243  				// n * -1 = -n
   244  				n = s.Prog(x86.ANEGL)
   245  				n.To.Type = obj.TYPE_REG
   246  				n.To.Reg = x86.REG_AX
   247  			} else {
   248  				// n % -1 == 0
   249  				n = s.Prog(x86.AXORL)
   250  				n.From.Type = obj.TYPE_REG
   251  				n.From.Reg = x86.REG_DX
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_DX
   254  			}
   255  
   256  			j.To.Val = n
   257  			j2.To.Val = s.Pc()
   258  		}
   259  
   260  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   261  		// the frontend rewrites constant division by 8/16/32 bit integers into
   262  		// HMUL by a constant
   263  		// SSA rewrites generate the 64 bit versions
   264  
   265  		// Arg[0] is already in AX as it's the only register we allow
   266  		// and DX is the only output we care about (the high bits)
   267  		p := s.Prog(v.Op.Asm())
   268  		p.From.Type = obj.TYPE_REG
   269  		p.From.Reg = v.Args[1].Reg()
   270  
   271  		// IMULB puts the high portion in AH instead of DL,
   272  		// so move it to DL for consistency
   273  		if v.Type.Size() == 1 {
   274  			m := s.Prog(x86.AMOVB)
   275  			m.From.Type = obj.TYPE_REG
   276  			m.From.Reg = x86.REG_AH
   277  			m.To.Type = obj.TYPE_REG
   278  			m.To.Reg = x86.REG_DX
   279  		}
   280  
   281  	case ssa.Op386MULLQU:
   282  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   283  		p := s.Prog(v.Op.Asm())
   284  		p.From.Type = obj.TYPE_REG
   285  		p.From.Reg = v.Args[1].Reg()
   286  
   287  	case ssa.Op386AVGLU:
   288  		// compute (x+y)/2 unsigned.
   289  		// Do a 32-bit add, the overflow goes into the carry.
   290  		// Shift right once and pull the carry back into the 31st bit.
   291  		r := v.Reg()
   292  		if r != v.Args[0].Reg() {
   293  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   294  		}
   295  		p := s.Prog(x86.AADDL)
   296  		p.From.Type = obj.TYPE_REG
   297  		p.To.Type = obj.TYPE_REG
   298  		p.To.Reg = r
   299  		p.From.Reg = v.Args[1].Reg()
   300  		p = s.Prog(x86.ARCRL)
   301  		p.From.Type = obj.TYPE_CONST
   302  		p.From.Offset = 1
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = r
   305  
   306  	case ssa.Op386ADDLconst:
   307  		r := v.Reg()
   308  		a := v.Args[0].Reg()
   309  		if r == a {
   310  			if v.AuxInt == 1 {
   311  				p := s.Prog(x86.AINCL)
   312  				p.To.Type = obj.TYPE_REG
   313  				p.To.Reg = r
   314  				return
   315  			}
   316  			if v.AuxInt == -1 {
   317  				p := s.Prog(x86.ADECL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			p := s.Prog(v.Op.Asm())
   323  			p.From.Type = obj.TYPE_CONST
   324  			p.From.Offset = v.AuxInt
   325  			p.To.Type = obj.TYPE_REG
   326  			p.To.Reg = r
   327  			return
   328  		}
   329  		p := s.Prog(x86.ALEAL)
   330  		p.From.Type = obj.TYPE_MEM
   331  		p.From.Reg = a
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  
   336  	case ssa.Op386MULLconst:
   337  		r := v.Reg()
   338  		if r != v.Args[0].Reg() {
   339  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   340  		}
   341  		p := s.Prog(v.Op.Asm())
   342  		p.From.Type = obj.TYPE_CONST
   343  		p.From.Offset = v.AuxInt
   344  		p.To.Type = obj.TYPE_REG
   345  		p.To.Reg = r
   346  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   347  		// then we don't need to use resultInArg0 for these ops.
   348  		//p.From3 = new(obj.Addr)
   349  		//p.From3.Type = obj.TYPE_REG
   350  		//p.From3.Reg = v.Args[0].Reg()
   351  
   352  	case ssa.Op386SUBLconst,
   353  		ssa.Op386ADCLconst,
   354  		ssa.Op386SBBLconst,
   355  		ssa.Op386ANDLconst,
   356  		ssa.Op386ORLconst,
   357  		ssa.Op386XORLconst,
   358  		ssa.Op386SHLLconst,
   359  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   360  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   361  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   362  		r := v.Reg()
   363  		if r != v.Args[0].Reg() {
   364  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   365  		}
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = v.AuxInt
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = r
   371  	case ssa.Op386SBBLcarrymask:
   372  		r := v.Reg()
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_REG
   375  		p.From.Reg = r
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   379  		r := v.Args[0].Reg()
   380  		i := v.Args[1].Reg()
   381  		p := s.Prog(x86.ALEAL)
   382  		switch v.Op {
   383  		case ssa.Op386LEAL1:
   384  			p.From.Scale = 1
   385  			if i == x86.REG_SP {
   386  				r, i = i, r
   387  			}
   388  		case ssa.Op386LEAL2:
   389  			p.From.Scale = 2
   390  		case ssa.Op386LEAL4:
   391  			p.From.Scale = 4
   392  		case ssa.Op386LEAL8:
   393  			p.From.Scale = 8
   394  		}
   395  		p.From.Type = obj.TYPE_MEM
   396  		p.From.Reg = r
   397  		p.From.Index = i
   398  		gc.AddAux(&p.From, v)
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg()
   401  	case ssa.Op386LEAL:
   402  		p := s.Prog(x86.ALEAL)
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = v.Args[0].Reg()
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   409  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   410  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   411  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   412  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   413  		// must account for that right here.
   414  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   415  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_REG
   418  		p.From.Reg = v.Args[0].Reg()
   419  		p.To.Type = obj.TYPE_CONST
   420  		p.To.Offset = v.AuxInt
   421  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_CONST
   424  		p.From.Offset = v.AuxInt
   425  		p.To.Type = obj.TYPE_REG
   426  		p.To.Reg = v.Args[0].Reg()
   427  	case ssa.Op386MOVLconst:
   428  		x := v.Reg()
   429  		p := s.Prog(v.Op.Asm())
   430  		p.From.Type = obj.TYPE_CONST
   431  		p.From.Offset = v.AuxInt
   432  		p.To.Type = obj.TYPE_REG
   433  		p.To.Reg = x
   434  		// If flags are live at this instruction, suppress the
   435  		// MOV $0,AX -> XOR AX,AX optimization.
   436  		if v.Aux != nil {
   437  			p.Mark |= x86.PRESERVEFLAGS
   438  		}
   439  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   440  		x := v.Reg()
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = x
   446  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   447  		p := s.Prog(x86.ALEAL)
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Name = obj.NAME_EXTERN
   450  		f := math.Float64frombits(uint64(v.AuxInt))
   451  		if v.Op == ssa.Op386MOVSDconst1 {
   452  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   453  		} else {
   454  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   455  		}
   456  		p.To.Type = obj.TYPE_REG
   457  		p.To.Reg = v.Reg()
   458  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_MEM
   461  		p.From.Reg = v.Args[0].Reg()
   462  		p.To.Type = obj.TYPE_REG
   463  		p.To.Reg = v.Reg()
   464  
   465  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_MEM
   468  		p.From.Reg = v.Args[0].Reg()
   469  		gc.AddAux(&p.From, v)
   470  		p.To.Type = obj.TYPE_REG
   471  		p.To.Reg = v.Reg()
   472  	case ssa.Op386MOVSDloadidx8:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.From.Scale = 8
   478  		p.From.Index = v.Args[1].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		gc.AddAux(&p.From, v)
   486  		p.From.Scale = 4
   487  		p.From.Index = v.Args[1].Reg()
   488  		p.To.Type = obj.TYPE_REG
   489  		p.To.Reg = v.Reg()
   490  	case ssa.Op386MOVWloadidx2:
   491  		p := s.Prog(v.Op.Asm())
   492  		p.From.Type = obj.TYPE_MEM
   493  		p.From.Reg = v.Args[0].Reg()
   494  		gc.AddAux(&p.From, v)
   495  		p.From.Scale = 2
   496  		p.From.Index = v.Args[1].Reg()
   497  		p.To.Type = obj.TYPE_REG
   498  		p.To.Reg = v.Reg()
   499  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   500  		r := v.Args[0].Reg()
   501  		i := v.Args[1].Reg()
   502  		if i == x86.REG_SP {
   503  			r, i = i, r
   504  		}
   505  		p := s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_MEM
   507  		p.From.Reg = r
   508  		p.From.Scale = 1
   509  		p.From.Index = i
   510  		gc.AddAux(&p.From, v)
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = v.Reg()
   513  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_REG
   516  		p.From.Reg = v.Args[1].Reg()
   517  		p.To.Type = obj.TYPE_MEM
   518  		p.To.Reg = v.Args[0].Reg()
   519  		gc.AddAux(&p.To, v)
   520  	case ssa.Op386MOVSDstoreidx8:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[2].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		p.To.Scale = 8
   527  		p.To.Index = v.Args[1].Reg()
   528  		gc.AddAux(&p.To, v)
   529  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   530  		p := s.Prog(v.Op.Asm())
   531  		p.From.Type = obj.TYPE_REG
   532  		p.From.Reg = v.Args[2].Reg()
   533  		p.To.Type = obj.TYPE_MEM
   534  		p.To.Reg = v.Args[0].Reg()
   535  		p.To.Scale = 4
   536  		p.To.Index = v.Args[1].Reg()
   537  		gc.AddAux(&p.To, v)
   538  	case ssa.Op386MOVWstoreidx2:
   539  		p := s.Prog(v.Op.Asm())
   540  		p.From.Type = obj.TYPE_REG
   541  		p.From.Reg = v.Args[2].Reg()
   542  		p.To.Type = obj.TYPE_MEM
   543  		p.To.Reg = v.Args[0].Reg()
   544  		p.To.Scale = 2
   545  		p.To.Index = v.Args[1].Reg()
   546  		gc.AddAux(&p.To, v)
   547  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   548  		r := v.Args[0].Reg()
   549  		i := v.Args[1].Reg()
   550  		if i == x86.REG_SP {
   551  			r, i = i, r
   552  		}
   553  		p := s.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_REG
   555  		p.From.Reg = v.Args[2].Reg()
   556  		p.To.Type = obj.TYPE_MEM
   557  		p.To.Reg = r
   558  		p.To.Scale = 1
   559  		p.To.Index = i
   560  		gc.AddAux(&p.To, v)
   561  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   562  		p := s.Prog(v.Op.Asm())
   563  		p.From.Type = obj.TYPE_CONST
   564  		sc := v.AuxValAndOff()
   565  		p.From.Offset = sc.Val()
   566  		p.To.Type = obj.TYPE_MEM
   567  		p.To.Reg = v.Args[0].Reg()
   568  		gc.AddAux2(&p.To, v, sc.Off())
   569  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_CONST
   572  		sc := v.AuxValAndOff()
   573  		p.From.Offset = sc.Val()
   574  		r := v.Args[0].Reg()
   575  		i := v.Args[1].Reg()
   576  		switch v.Op {
   577  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   578  			p.To.Scale = 1
   579  			if i == x86.REG_SP {
   580  				r, i = i, r
   581  			}
   582  		case ssa.Op386MOVWstoreconstidx2:
   583  			p.To.Scale = 2
   584  		case ssa.Op386MOVLstoreconstidx4:
   585  			p.To.Scale = 4
   586  		}
   587  		p.To.Type = obj.TYPE_MEM
   588  		p.To.Reg = r
   589  		p.To.Index = i
   590  		gc.AddAux2(&p.To, v, sc.Off())
   591  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   592  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   593  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   594  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   595  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   596  	case ssa.Op386DUFFZERO:
   597  		p := s.Prog(obj.ADUFFZERO)
   598  		p.To.Type = obj.TYPE_ADDR
   599  		p.To.Sym = gc.Duffzero
   600  		p.To.Offset = v.AuxInt
   601  	case ssa.Op386DUFFCOPY:
   602  		p := s.Prog(obj.ADUFFCOPY)
   603  		p.To.Type = obj.TYPE_ADDR
   604  		p.To.Sym = gc.Duffcopy
   605  		p.To.Offset = v.AuxInt
   606  
   607  	case ssa.Op386MOVLconvert:
   608  		if v.Args[0].Reg() != v.Reg() {
   609  			v.Fatalf("MOVLconvert should be a no-op")
   610  		}
   611  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   612  		if v.Type.IsMemory() {
   613  			return
   614  		}
   615  		x := v.Args[0].Reg()
   616  		y := v.Reg()
   617  		if x != y {
   618  			opregreg(s, moveByType(v.Type), y, x)
   619  		}
   620  	case ssa.OpLoadReg:
   621  		if v.Type.IsFlags() {
   622  			v.Fatalf("load flags not implemented: %v", v.LongString())
   623  			return
   624  		}
   625  		p := s.Prog(loadByType(v.Type))
   626  		gc.AddrAuto(&p.From, v.Args[0])
   627  		p.To.Type = obj.TYPE_REG
   628  		p.To.Reg = v.Reg()
   629  
   630  	case ssa.OpStoreReg:
   631  		if v.Type.IsFlags() {
   632  			v.Fatalf("store flags not implemented: %v", v.LongString())
   633  			return
   634  		}
   635  		p := s.Prog(storeByType(v.Type))
   636  		p.From.Type = obj.TYPE_REG
   637  		p.From.Reg = v.Args[0].Reg()
   638  		gc.AddrAuto(&p.To, v)
   639  	case ssa.Op386LoweredGetClosurePtr:
   640  		// Closure pointer is DX.
   641  		gc.CheckLoweredGetClosurePtr(v)
   642  	case ssa.Op386LoweredGetG:
   643  		r := v.Reg()
   644  		// See the comments in cmd/internal/obj/x86/obj6.go
   645  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   646  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   647  			// MOVL (TLS), r
   648  			p := s.Prog(x86.AMOVL)
   649  			p.From.Type = obj.TYPE_MEM
   650  			p.From.Reg = x86.REG_TLS
   651  			p.To.Type = obj.TYPE_REG
   652  			p.To.Reg = r
   653  		} else {
   654  			// MOVL TLS, r
   655  			// MOVL (r)(TLS*1), r
   656  			p := s.Prog(x86.AMOVL)
   657  			p.From.Type = obj.TYPE_REG
   658  			p.From.Reg = x86.REG_TLS
   659  			p.To.Type = obj.TYPE_REG
   660  			p.To.Reg = r
   661  			q := s.Prog(x86.AMOVL)
   662  			q.From.Type = obj.TYPE_MEM
   663  			q.From.Reg = r
   664  			q.From.Index = x86.REG_TLS
   665  			q.From.Scale = 1
   666  			q.To.Type = obj.TYPE_REG
   667  			q.To.Reg = r
   668  		}
   669  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   670  		s.Call(v)
   671  	case ssa.Op386NEGL,
   672  		ssa.Op386BSWAPL,
   673  		ssa.Op386NOTL:
   674  		r := v.Reg()
   675  		if r != v.Args[0].Reg() {
   676  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   677  		}
   678  		p := s.Prog(v.Op.Asm())
   679  		p.To.Type = obj.TYPE_REG
   680  		p.To.Reg = r
   681  	case ssa.Op386BSFL, ssa.Op386BSFW,
   682  		ssa.Op386BSRL, ssa.Op386BSRW,
   683  		ssa.Op386SQRTSD:
   684  		p := s.Prog(v.Op.Asm())
   685  		p.From.Type = obj.TYPE_REG
   686  		p.From.Reg = v.Args[0].Reg()
   687  		p.To.Type = obj.TYPE_REG
   688  		p.To.Reg = v.Reg()
   689  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   690  		ssa.Op386SETL, ssa.Op386SETLE,
   691  		ssa.Op386SETG, ssa.Op386SETGE,
   692  		ssa.Op386SETGF, ssa.Op386SETGEF,
   693  		ssa.Op386SETB, ssa.Op386SETBE,
   694  		ssa.Op386SETORD, ssa.Op386SETNAN,
   695  		ssa.Op386SETA, ssa.Op386SETAE:
   696  		p := s.Prog(v.Op.Asm())
   697  		p.To.Type = obj.TYPE_REG
   698  		p.To.Reg = v.Reg()
   699  
   700  	case ssa.Op386SETNEF:
   701  		p := s.Prog(v.Op.Asm())
   702  		p.To.Type = obj.TYPE_REG
   703  		p.To.Reg = v.Reg()
   704  		q := s.Prog(x86.ASETPS)
   705  		q.To.Type = obj.TYPE_REG
   706  		q.To.Reg = x86.REG_AX
   707  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   708  
   709  	case ssa.Op386SETEQF:
   710  		p := s.Prog(v.Op.Asm())
   711  		p.To.Type = obj.TYPE_REG
   712  		p.To.Reg = v.Reg()
   713  		q := s.Prog(x86.ASETPC)
   714  		q.To.Type = obj.TYPE_REG
   715  		q.To.Reg = x86.REG_AX
   716  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   717  
   718  	case ssa.Op386InvertFlags:
   719  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   720  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   721  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   722  	case ssa.Op386REPSTOSL:
   723  		s.Prog(x86.AREP)
   724  		s.Prog(x86.ASTOSL)
   725  	case ssa.Op386REPMOVSL:
   726  		s.Prog(x86.AREP)
   727  		s.Prog(x86.AMOVSL)
   728  	case ssa.Op386LoweredNilCheck:
   729  		// Issue a load which will fault if the input is nil.
   730  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   731  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   732  		// but it doesn't have false dependency on AX.
   733  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   734  		// That trades clobbering flags for clobbering a register.
   735  		p := s.Prog(x86.ATESTB)
   736  		p.From.Type = obj.TYPE_REG
   737  		p.From.Reg = x86.REG_AX
   738  		p.To.Type = obj.TYPE_MEM
   739  		p.To.Reg = v.Args[0].Reg()
   740  		gc.AddAux(&p.To, v)
   741  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   742  			gc.Warnl(v.Pos, "generated nil check")
   743  		}
   744  	case ssa.Op386FCHS:
   745  		v.Fatalf("FCHS in non-387 mode")
   746  	case ssa.OpClobber:
   747  		p := s.Prog(x86.AMOVL)
   748  		p.From.Type = obj.TYPE_CONST
   749  		p.From.Offset = 0xdeaddead
   750  		p.To.Type = obj.TYPE_MEM
   751  		p.To.Reg = x86.REG_SP
   752  		gc.AddAux(&p.To, v)
   753  	default:
   754  		v.Fatalf("genValue not implemented: %s", v.LongString())
   755  	}
   756  }
   757  
   758  var blockJump = [...]struct {
   759  	asm, invasm obj.As
   760  }{
   761  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   762  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   763  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   764  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   765  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   766  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   767  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   768  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   769  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   770  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   771  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   772  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   773  }
   774  
   775  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   776  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   777  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   778  }
   779  var nefJumps = [2][2]gc.FloatingEQNEJump{
   780  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   781  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   782  }
   783  
   784  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   785  	switch b.Kind {
   786  	case ssa.BlockPlain:
   787  		if b.Succs[0].Block() != next {
   788  			p := s.Prog(obj.AJMP)
   789  			p.To.Type = obj.TYPE_BRANCH
   790  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   791  		}
   792  	case ssa.BlockDefer:
   793  		// defer returns in rax:
   794  		// 0 if we should continue executing
   795  		// 1 if we should jump to deferreturn call
   796  		p := s.Prog(x86.ATESTL)
   797  		p.From.Type = obj.TYPE_REG
   798  		p.From.Reg = x86.REG_AX
   799  		p.To.Type = obj.TYPE_REG
   800  		p.To.Reg = x86.REG_AX
   801  		p = s.Prog(x86.AJNE)
   802  		p.To.Type = obj.TYPE_BRANCH
   803  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   804  		if b.Succs[0].Block() != next {
   805  			p := s.Prog(obj.AJMP)
   806  			p.To.Type = obj.TYPE_BRANCH
   807  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   808  		}
   809  	case ssa.BlockExit:
   810  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   811  	case ssa.BlockRet:
   812  		s.Prog(obj.ARET)
   813  	case ssa.BlockRetJmp:
   814  		p := s.Prog(obj.AJMP)
   815  		p.To.Type = obj.TYPE_MEM
   816  		p.To.Name = obj.NAME_EXTERN
   817  		p.To.Sym = b.Aux.(*obj.LSym)
   818  
   819  	case ssa.Block386EQF:
   820  		s.FPJump(b, next, &eqfJumps)
   821  
   822  	case ssa.Block386NEF:
   823  		s.FPJump(b, next, &nefJumps)
   824  
   825  	case ssa.Block386EQ, ssa.Block386NE,
   826  		ssa.Block386LT, ssa.Block386GE,
   827  		ssa.Block386LE, ssa.Block386GT,
   828  		ssa.Block386ULT, ssa.Block386UGT,
   829  		ssa.Block386ULE, ssa.Block386UGE:
   830  		jmp := blockJump[b.Kind]
   831  		var p *obj.Prog
   832  		switch next {
   833  		case b.Succs[0].Block():
   834  			p = s.Prog(jmp.invasm)
   835  			p.To.Type = obj.TYPE_BRANCH
   836  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   837  		case b.Succs[1].Block():
   838  			p = s.Prog(jmp.asm)
   839  			p.To.Type = obj.TYPE_BRANCH
   840  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   841  		default:
   842  			p = s.Prog(jmp.asm)
   843  			p.To.Type = obj.TYPE_BRANCH
   844  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   845  			q := s.Prog(obj.AJMP)
   846  			q.To.Type = obj.TYPE_BRANCH
   847  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   848  		}
   849  
   850  	default:
   851  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   852  	}
   853  }