github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/x86"
    15  )
    16  
    17  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    18  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    19  	flive := b.FlagsLiveAtEnd
    20  	if b.Control != nil && b.Control.Type.IsFlags() {
    21  		flive = true
    22  	}
    23  	for i := len(b.Values) - 1; i >= 0; i-- {
    24  		v := b.Values[i]
    25  		if flive && v.Op == ssa.Op386MOVLconst {
    26  			// The "mark" is any non-nil Aux value.
    27  			v.Aux = v
    28  		}
    29  		if v.Type.IsFlags() {
    30  			flive = false
    31  		}
    32  		for _, a := range v.Args {
    33  			if a.Type.IsFlags() {
    34  				flive = true
    35  			}
    36  		}
    37  	}
    38  }
    39  
    40  // loadByType returns the load instruction of the given type.
    41  func loadByType(t ssa.Type) obj.As {
    42  	// Avoid partial register write
    43  	if !t.IsFloat() && t.Size() <= 2 {
    44  		if t.Size() == 1 {
    45  			return x86.AMOVBLZX
    46  		} else {
    47  			return x86.AMOVWLZX
    48  		}
    49  	}
    50  	// Otherwise, there's no difference between load and store opcodes.
    51  	return storeByType(t)
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t ssa.Type) obj.As {
    56  	width := t.Size()
    57  	if t.IsFloat() {
    58  		switch width {
    59  		case 4:
    60  			return x86.AMOVSS
    61  		case 8:
    62  			return x86.AMOVSD
    63  		}
    64  	} else {
    65  		switch width {
    66  		case 1:
    67  			return x86.AMOVB
    68  		case 2:
    69  			return x86.AMOVW
    70  		case 4:
    71  			return x86.AMOVL
    72  		}
    73  	}
    74  	panic("bad store type")
    75  }
    76  
    77  // moveByType returns the reg->reg move instruction of the given type.
    78  func moveByType(t ssa.Type) obj.As {
    79  	if t.IsFloat() {
    80  		switch t.Size() {
    81  		case 4:
    82  			return x86.AMOVSS
    83  		case 8:
    84  			return x86.AMOVSD
    85  		default:
    86  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    87  		}
    88  	} else {
    89  		switch t.Size() {
    90  		case 1:
    91  			// Avoids partial register write
    92  			return x86.AMOVL
    93  		case 2:
    94  			return x86.AMOVL
    95  		case 4:
    96  			return x86.AMOVL
    97  		default:
    98  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
    99  		}
   100  	}
   101  }
   102  
   103  // opregreg emits instructions for
   104  //     dest := dest(To) op src(From)
   105  // and also returns the created obj.Prog so it
   106  // may be further adjusted (offset, scale, etc).
   107  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   108  	p := s.Prog(op)
   109  	p.From.Type = obj.TYPE_REG
   110  	p.To.Type = obj.TYPE_REG
   111  	p.To.Reg = dest
   112  	p.From.Reg = src
   113  	return p
   114  }
   115  
   116  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   117  	switch v.Op {
   118  	case ssa.Op386ADDL:
   119  		r := v.Reg()
   120  		r1 := v.Args[0].Reg()
   121  		r2 := v.Args[1].Reg()
   122  		switch {
   123  		case r == r1:
   124  			p := s.Prog(v.Op.Asm())
   125  			p.From.Type = obj.TYPE_REG
   126  			p.From.Reg = r2
   127  			p.To.Type = obj.TYPE_REG
   128  			p.To.Reg = r
   129  		case r == r2:
   130  			p := s.Prog(v.Op.Asm())
   131  			p.From.Type = obj.TYPE_REG
   132  			p.From.Reg = r1
   133  			p.To.Type = obj.TYPE_REG
   134  			p.To.Reg = r
   135  		default:
   136  			p := s.Prog(x86.ALEAL)
   137  			p.From.Type = obj.TYPE_MEM
   138  			p.From.Reg = r1
   139  			p.From.Scale = 1
   140  			p.From.Index = r2
   141  			p.To.Type = obj.TYPE_REG
   142  			p.To.Reg = r
   143  		}
   144  
   145  	// 2-address opcode arithmetic
   146  	case ssa.Op386SUBL,
   147  		ssa.Op386MULL,
   148  		ssa.Op386ANDL,
   149  		ssa.Op386ORL,
   150  		ssa.Op386XORL,
   151  		ssa.Op386SHLL,
   152  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   153  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   154  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   155  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   156  		ssa.Op386PXOR,
   157  		ssa.Op386ADCL,
   158  		ssa.Op386SBBL:
   159  		r := v.Reg()
   160  		if r != v.Args[0].Reg() {
   161  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   162  		}
   163  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   164  
   165  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   166  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   167  		r := v.Reg0()
   168  		if r != v.Args[0].Reg() {
   169  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   170  		}
   171  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   172  
   173  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   174  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   175  		r := v.Reg0()
   176  		if r != v.Args[0].Reg() {
   177  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   178  		}
   179  		p := s.Prog(v.Op.Asm())
   180  		p.From.Type = obj.TYPE_CONST
   181  		p.From.Offset = v.AuxInt
   182  		p.To.Type = obj.TYPE_REG
   183  		p.To.Reg = r
   184  
   185  	case ssa.Op386DIVL, ssa.Op386DIVW,
   186  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   187  		ssa.Op386MODL, ssa.Op386MODW,
   188  		ssa.Op386MODLU, ssa.Op386MODWU:
   189  
   190  		// Arg[0] is already in AX as it's the only register we allow
   191  		// and AX is the only output
   192  		x := v.Args[1].Reg()
   193  
   194  		// CPU faults upon signed overflow, which occurs when most
   195  		// negative int is divided by -1.
   196  		var j *obj.Prog
   197  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   198  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   199  
   200  			var c *obj.Prog
   201  			switch v.Op {
   202  			case ssa.Op386DIVL, ssa.Op386MODL:
   203  				c = s.Prog(x86.ACMPL)
   204  				j = s.Prog(x86.AJEQ)
   205  				s.Prog(x86.ACDQ) //TODO: fix
   206  
   207  			case ssa.Op386DIVW, ssa.Op386MODW:
   208  				c = s.Prog(x86.ACMPW)
   209  				j = s.Prog(x86.AJEQ)
   210  				s.Prog(x86.ACWD)
   211  			}
   212  			c.From.Type = obj.TYPE_REG
   213  			c.From.Reg = x
   214  			c.To.Type = obj.TYPE_CONST
   215  			c.To.Offset = -1
   216  
   217  			j.To.Type = obj.TYPE_BRANCH
   218  		}
   219  
   220  		// for unsigned ints, we sign extend by setting DX = 0
   221  		// signed ints were sign extended above
   222  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   223  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   224  			c := s.Prog(x86.AXORL)
   225  			c.From.Type = obj.TYPE_REG
   226  			c.From.Reg = x86.REG_DX
   227  			c.To.Type = obj.TYPE_REG
   228  			c.To.Reg = x86.REG_DX
   229  		}
   230  
   231  		p := s.Prog(v.Op.Asm())
   232  		p.From.Type = obj.TYPE_REG
   233  		p.From.Reg = x
   234  
   235  		// signed division, rest of the check for -1 case
   236  		if j != nil {
   237  			j2 := s.Prog(obj.AJMP)
   238  			j2.To.Type = obj.TYPE_BRANCH
   239  
   240  			var n *obj.Prog
   241  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   242  				// n * -1 = -n
   243  				n = s.Prog(x86.ANEGL)
   244  				n.To.Type = obj.TYPE_REG
   245  				n.To.Reg = x86.REG_AX
   246  			} else {
   247  				// n % -1 == 0
   248  				n = s.Prog(x86.AXORL)
   249  				n.From.Type = obj.TYPE_REG
   250  				n.From.Reg = x86.REG_DX
   251  				n.To.Type = obj.TYPE_REG
   252  				n.To.Reg = x86.REG_DX
   253  			}
   254  
   255  			j.To.Val = n
   256  			j2.To.Val = s.Pc()
   257  		}
   258  
   259  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   260  		// the frontend rewrites constant division by 8/16/32 bit integers into
   261  		// HMUL by a constant
   262  		// SSA rewrites generate the 64 bit versions
   263  
   264  		// Arg[0] is already in AX as it's the only register we allow
   265  		// and DX is the only output we care about (the high bits)
   266  		p := s.Prog(v.Op.Asm())
   267  		p.From.Type = obj.TYPE_REG
   268  		p.From.Reg = v.Args[1].Reg()
   269  
   270  		// IMULB puts the high portion in AH instead of DL,
   271  		// so move it to DL for consistency
   272  		if v.Type.Size() == 1 {
   273  			m := s.Prog(x86.AMOVB)
   274  			m.From.Type = obj.TYPE_REG
   275  			m.From.Reg = x86.REG_AH
   276  			m.To.Type = obj.TYPE_REG
   277  			m.To.Reg = x86.REG_DX
   278  		}
   279  
   280  	case ssa.Op386MULLQU:
   281  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   282  		p := s.Prog(v.Op.Asm())
   283  		p.From.Type = obj.TYPE_REG
   284  		p.From.Reg = v.Args[1].Reg()
   285  
   286  	case ssa.Op386AVGLU:
   287  		// compute (x+y)/2 unsigned.
   288  		// Do a 32-bit add, the overflow goes into the carry.
   289  		// Shift right once and pull the carry back into the 31st bit.
   290  		r := v.Reg()
   291  		if r != v.Args[0].Reg() {
   292  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   293  		}
   294  		p := s.Prog(x86.AADDL)
   295  		p.From.Type = obj.TYPE_REG
   296  		p.To.Type = obj.TYPE_REG
   297  		p.To.Reg = r
   298  		p.From.Reg = v.Args[1].Reg()
   299  		p = s.Prog(x86.ARCRL)
   300  		p.From.Type = obj.TYPE_CONST
   301  		p.From.Offset = 1
   302  		p.To.Type = obj.TYPE_REG
   303  		p.To.Reg = r
   304  
   305  	case ssa.Op386ADDLconst:
   306  		r := v.Reg()
   307  		a := v.Args[0].Reg()
   308  		if r == a {
   309  			if v.AuxInt == 1 {
   310  				p := s.Prog(x86.AINCL)
   311  				p.To.Type = obj.TYPE_REG
   312  				p.To.Reg = r
   313  				return
   314  			}
   315  			if v.AuxInt == -1 {
   316  				p := s.Prog(x86.ADECL)
   317  				p.To.Type = obj.TYPE_REG
   318  				p.To.Reg = r
   319  				return
   320  			}
   321  			p := s.Prog(v.Op.Asm())
   322  			p.From.Type = obj.TYPE_CONST
   323  			p.From.Offset = v.AuxInt
   324  			p.To.Type = obj.TYPE_REG
   325  			p.To.Reg = r
   326  			return
   327  		}
   328  		p := s.Prog(x86.ALEAL)
   329  		p.From.Type = obj.TYPE_MEM
   330  		p.From.Reg = a
   331  		p.From.Offset = v.AuxInt
   332  		p.To.Type = obj.TYPE_REG
   333  		p.To.Reg = r
   334  
   335  	case ssa.Op386MULLconst:
   336  		r := v.Reg()
   337  		if r != v.Args[0].Reg() {
   338  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   339  		}
   340  		p := s.Prog(v.Op.Asm())
   341  		p.From.Type = obj.TYPE_CONST
   342  		p.From.Offset = v.AuxInt
   343  		p.To.Type = obj.TYPE_REG
   344  		p.To.Reg = r
   345  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   346  		// then we don't need to use resultInArg0 for these ops.
   347  		//p.From3 = new(obj.Addr)
   348  		//p.From3.Type = obj.TYPE_REG
   349  		//p.From3.Reg = v.Args[0].Reg()
   350  
   351  	case ssa.Op386SUBLconst,
   352  		ssa.Op386ADCLconst,
   353  		ssa.Op386SBBLconst,
   354  		ssa.Op386ANDLconst,
   355  		ssa.Op386ORLconst,
   356  		ssa.Op386XORLconst,
   357  		ssa.Op386SHLLconst,
   358  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   359  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   360  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   361  		r := v.Reg()
   362  		if r != v.Args[0].Reg() {
   363  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   364  		}
   365  		p := s.Prog(v.Op.Asm())
   366  		p.From.Type = obj.TYPE_CONST
   367  		p.From.Offset = v.AuxInt
   368  		p.To.Type = obj.TYPE_REG
   369  		p.To.Reg = r
   370  	case ssa.Op386SBBLcarrymask:
   371  		r := v.Reg()
   372  		p := s.Prog(v.Op.Asm())
   373  		p.From.Type = obj.TYPE_REG
   374  		p.From.Reg = r
   375  		p.To.Type = obj.TYPE_REG
   376  		p.To.Reg = r
   377  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   378  		r := v.Args[0].Reg()
   379  		i := v.Args[1].Reg()
   380  		p := s.Prog(x86.ALEAL)
   381  		switch v.Op {
   382  		case ssa.Op386LEAL1:
   383  			p.From.Scale = 1
   384  			if i == x86.REG_SP {
   385  				r, i = i, r
   386  			}
   387  		case ssa.Op386LEAL2:
   388  			p.From.Scale = 2
   389  		case ssa.Op386LEAL4:
   390  			p.From.Scale = 4
   391  		case ssa.Op386LEAL8:
   392  			p.From.Scale = 8
   393  		}
   394  		p.From.Type = obj.TYPE_MEM
   395  		p.From.Reg = r
   396  		p.From.Index = i
   397  		gc.AddAux(&p.From, v)
   398  		p.To.Type = obj.TYPE_REG
   399  		p.To.Reg = v.Reg()
   400  	case ssa.Op386LEAL:
   401  		p := s.Prog(x86.ALEAL)
   402  		p.From.Type = obj.TYPE_MEM
   403  		p.From.Reg = v.Args[0].Reg()
   404  		gc.AddAux(&p.From, v)
   405  		p.To.Type = obj.TYPE_REG
   406  		p.To.Reg = v.Reg()
   407  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   408  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   409  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   410  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   411  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   412  		// must account for that right here.
   413  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   414  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   415  		p := s.Prog(v.Op.Asm())
   416  		p.From.Type = obj.TYPE_REG
   417  		p.From.Reg = v.Args[0].Reg()
   418  		p.To.Type = obj.TYPE_CONST
   419  		p.To.Offset = v.AuxInt
   420  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   421  		p := s.Prog(v.Op.Asm())
   422  		p.From.Type = obj.TYPE_CONST
   423  		p.From.Offset = v.AuxInt
   424  		p.To.Type = obj.TYPE_REG
   425  		p.To.Reg = v.Args[0].Reg()
   426  	case ssa.Op386MOVLconst:
   427  		x := v.Reg()
   428  		p := s.Prog(v.Op.Asm())
   429  		p.From.Type = obj.TYPE_CONST
   430  		p.From.Offset = v.AuxInt
   431  		p.To.Type = obj.TYPE_REG
   432  		p.To.Reg = x
   433  		// If flags are live at this instruction, suppress the
   434  		// MOV $0,AX -> XOR AX,AX optimization.
   435  		if v.Aux != nil {
   436  			p.Mark |= x86.PRESERVEFLAGS
   437  		}
   438  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   439  		x := v.Reg()
   440  		p := s.Prog(v.Op.Asm())
   441  		p.From.Type = obj.TYPE_FCONST
   442  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = x
   445  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   446  		p := s.Prog(x86.ALEAL)
   447  		p.From.Type = obj.TYPE_MEM
   448  		p.From.Name = obj.NAME_EXTERN
   449  		f := math.Float64frombits(uint64(v.AuxInt))
   450  		if v.Op == ssa.Op386MOVSDconst1 {
   451  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   452  		} else {
   453  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   454  		}
   455  		p.To.Type = obj.TYPE_REG
   456  		p.To.Reg = v.Reg()
   457  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   458  		p := s.Prog(v.Op.Asm())
   459  		p.From.Type = obj.TYPE_MEM
   460  		p.From.Reg = v.Args[0].Reg()
   461  		p.To.Type = obj.TYPE_REG
   462  		p.To.Reg = v.Reg()
   463  
   464  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   465  		p := s.Prog(v.Op.Asm())
   466  		p.From.Type = obj.TYPE_MEM
   467  		p.From.Reg = v.Args[0].Reg()
   468  		gc.AddAux(&p.From, v)
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  	case ssa.Op386MOVSDloadidx8:
   472  		p := s.Prog(v.Op.Asm())
   473  		p.From.Type = obj.TYPE_MEM
   474  		p.From.Reg = v.Args[0].Reg()
   475  		gc.AddAux(&p.From, v)
   476  		p.From.Scale = 8
   477  		p.From.Index = v.Args[1].Reg()
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg()
   480  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   481  		p := s.Prog(v.Op.Asm())
   482  		p.From.Type = obj.TYPE_MEM
   483  		p.From.Reg = v.Args[0].Reg()
   484  		gc.AddAux(&p.From, v)
   485  		p.From.Scale = 4
   486  		p.From.Index = v.Args[1].Reg()
   487  		p.To.Type = obj.TYPE_REG
   488  		p.To.Reg = v.Reg()
   489  	case ssa.Op386MOVWloadidx2:
   490  		p := s.Prog(v.Op.Asm())
   491  		p.From.Type = obj.TYPE_MEM
   492  		p.From.Reg = v.Args[0].Reg()
   493  		gc.AddAux(&p.From, v)
   494  		p.From.Scale = 2
   495  		p.From.Index = v.Args[1].Reg()
   496  		p.To.Type = obj.TYPE_REG
   497  		p.To.Reg = v.Reg()
   498  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   499  		r := v.Args[0].Reg()
   500  		i := v.Args[1].Reg()
   501  		if i == x86.REG_SP {
   502  			r, i = i, r
   503  		}
   504  		p := s.Prog(v.Op.Asm())
   505  		p.From.Type = obj.TYPE_MEM
   506  		p.From.Reg = r
   507  		p.From.Scale = 1
   508  		p.From.Index = i
   509  		gc.AddAux(&p.From, v)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = v.Reg()
   512  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   513  		p := s.Prog(v.Op.Asm())
   514  		p.From.Type = obj.TYPE_REG
   515  		p.From.Reg = v.Args[1].Reg()
   516  		p.To.Type = obj.TYPE_MEM
   517  		p.To.Reg = v.Args[0].Reg()
   518  		gc.AddAux(&p.To, v)
   519  	case ssa.Op386MOVSDstoreidx8:
   520  		p := s.Prog(v.Op.Asm())
   521  		p.From.Type = obj.TYPE_REG
   522  		p.From.Reg = v.Args[2].Reg()
   523  		p.To.Type = obj.TYPE_MEM
   524  		p.To.Reg = v.Args[0].Reg()
   525  		p.To.Scale = 8
   526  		p.To.Index = v.Args[1].Reg()
   527  		gc.AddAux(&p.To, v)
   528  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   529  		p := s.Prog(v.Op.Asm())
   530  		p.From.Type = obj.TYPE_REG
   531  		p.From.Reg = v.Args[2].Reg()
   532  		p.To.Type = obj.TYPE_MEM
   533  		p.To.Reg = v.Args[0].Reg()
   534  		p.To.Scale = 4
   535  		p.To.Index = v.Args[1].Reg()
   536  		gc.AddAux(&p.To, v)
   537  	case ssa.Op386MOVWstoreidx2:
   538  		p := s.Prog(v.Op.Asm())
   539  		p.From.Type = obj.TYPE_REG
   540  		p.From.Reg = v.Args[2].Reg()
   541  		p.To.Type = obj.TYPE_MEM
   542  		p.To.Reg = v.Args[0].Reg()
   543  		p.To.Scale = 2
   544  		p.To.Index = v.Args[1].Reg()
   545  		gc.AddAux(&p.To, v)
   546  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   547  		r := v.Args[0].Reg()
   548  		i := v.Args[1].Reg()
   549  		if i == x86.REG_SP {
   550  			r, i = i, r
   551  		}
   552  		p := s.Prog(v.Op.Asm())
   553  		p.From.Type = obj.TYPE_REG
   554  		p.From.Reg = v.Args[2].Reg()
   555  		p.To.Type = obj.TYPE_MEM
   556  		p.To.Reg = r
   557  		p.To.Scale = 1
   558  		p.To.Index = i
   559  		gc.AddAux(&p.To, v)
   560  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_CONST
   563  		sc := v.AuxValAndOff()
   564  		p.From.Offset = sc.Val()
   565  		p.To.Type = obj.TYPE_MEM
   566  		p.To.Reg = v.Args[0].Reg()
   567  		gc.AddAux2(&p.To, v, sc.Off())
   568  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   569  		p := s.Prog(v.Op.Asm())
   570  		p.From.Type = obj.TYPE_CONST
   571  		sc := v.AuxValAndOff()
   572  		p.From.Offset = sc.Val()
   573  		r := v.Args[0].Reg()
   574  		i := v.Args[1].Reg()
   575  		switch v.Op {
   576  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   577  			p.To.Scale = 1
   578  			if i == x86.REG_SP {
   579  				r, i = i, r
   580  			}
   581  		case ssa.Op386MOVWstoreconstidx2:
   582  			p.To.Scale = 2
   583  		case ssa.Op386MOVLstoreconstidx4:
   584  			p.To.Scale = 4
   585  		}
   586  		p.To.Type = obj.TYPE_MEM
   587  		p.To.Reg = r
   588  		p.To.Index = i
   589  		gc.AddAux2(&p.To, v, sc.Off())
   590  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   591  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   592  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   593  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   594  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   595  	case ssa.Op386DUFFZERO:
   596  		p := s.Prog(obj.ADUFFZERO)
   597  		p.To.Type = obj.TYPE_ADDR
   598  		p.To.Sym = gc.Duffzero
   599  		p.To.Offset = v.AuxInt
   600  	case ssa.Op386DUFFCOPY:
   601  		p := s.Prog(obj.ADUFFCOPY)
   602  		p.To.Type = obj.TYPE_ADDR
   603  		p.To.Sym = gc.Duffcopy
   604  		p.To.Offset = v.AuxInt
   605  
   606  	case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   607  		if v.Type.IsMemory() {
   608  			return
   609  		}
   610  		x := v.Args[0].Reg()
   611  		y := v.Reg()
   612  		if x != y {
   613  			opregreg(s, moveByType(v.Type), y, x)
   614  		}
   615  	case ssa.OpLoadReg:
   616  		if v.Type.IsFlags() {
   617  			v.Fatalf("load flags not implemented: %v", v.LongString())
   618  			return
   619  		}
   620  		p := s.Prog(loadByType(v.Type))
   621  		gc.AddrAuto(&p.From, v.Args[0])
   622  		p.To.Type = obj.TYPE_REG
   623  		p.To.Reg = v.Reg()
   624  
   625  	case ssa.OpStoreReg:
   626  		if v.Type.IsFlags() {
   627  			v.Fatalf("store flags not implemented: %v", v.LongString())
   628  			return
   629  		}
   630  		p := s.Prog(storeByType(v.Type))
   631  		p.From.Type = obj.TYPE_REG
   632  		p.From.Reg = v.Args[0].Reg()
   633  		gc.AddrAuto(&p.To, v)
   634  	case ssa.Op386LoweredGetClosurePtr:
   635  		// Closure pointer is DX.
   636  		gc.CheckLoweredGetClosurePtr(v)
   637  	case ssa.Op386LoweredGetG:
   638  		r := v.Reg()
   639  		// See the comments in cmd/internal/obj/x86/obj6.go
   640  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   641  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   642  			// MOVL (TLS), r
   643  			p := s.Prog(x86.AMOVL)
   644  			p.From.Type = obj.TYPE_MEM
   645  			p.From.Reg = x86.REG_TLS
   646  			p.To.Type = obj.TYPE_REG
   647  			p.To.Reg = r
   648  		} else {
   649  			// MOVL TLS, r
   650  			// MOVL (r)(TLS*1), r
   651  			p := s.Prog(x86.AMOVL)
   652  			p.From.Type = obj.TYPE_REG
   653  			p.From.Reg = x86.REG_TLS
   654  			p.To.Type = obj.TYPE_REG
   655  			p.To.Reg = r
   656  			q := s.Prog(x86.AMOVL)
   657  			q.From.Type = obj.TYPE_MEM
   658  			q.From.Reg = r
   659  			q.From.Index = x86.REG_TLS
   660  			q.From.Scale = 1
   661  			q.To.Type = obj.TYPE_REG
   662  			q.To.Reg = r
   663  		}
   664  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   665  		s.Call(v)
   666  	case ssa.Op386NEGL,
   667  		ssa.Op386BSWAPL,
   668  		ssa.Op386NOTL:
   669  		r := v.Reg()
   670  		if r != v.Args[0].Reg() {
   671  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   672  		}
   673  		p := s.Prog(v.Op.Asm())
   674  		p.To.Type = obj.TYPE_REG
   675  		p.To.Reg = r
   676  	case ssa.Op386BSFL, ssa.Op386BSFW,
   677  		ssa.Op386BSRL, ssa.Op386BSRW,
   678  		ssa.Op386SQRTSD:
   679  		p := s.Prog(v.Op.Asm())
   680  		p.From.Type = obj.TYPE_REG
   681  		p.From.Reg = v.Args[0].Reg()
   682  		p.To.Type = obj.TYPE_REG
   683  		p.To.Reg = v.Reg()
   684  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   685  		ssa.Op386SETL, ssa.Op386SETLE,
   686  		ssa.Op386SETG, ssa.Op386SETGE,
   687  		ssa.Op386SETGF, ssa.Op386SETGEF,
   688  		ssa.Op386SETB, ssa.Op386SETBE,
   689  		ssa.Op386SETORD, ssa.Op386SETNAN,
   690  		ssa.Op386SETA, ssa.Op386SETAE:
   691  		p := s.Prog(v.Op.Asm())
   692  		p.To.Type = obj.TYPE_REG
   693  		p.To.Reg = v.Reg()
   694  
   695  	case ssa.Op386SETNEF:
   696  		p := s.Prog(v.Op.Asm())
   697  		p.To.Type = obj.TYPE_REG
   698  		p.To.Reg = v.Reg()
   699  		q := s.Prog(x86.ASETPS)
   700  		q.To.Type = obj.TYPE_REG
   701  		q.To.Reg = x86.REG_AX
   702  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   703  
   704  	case ssa.Op386SETEQF:
   705  		p := s.Prog(v.Op.Asm())
   706  		p.To.Type = obj.TYPE_REG
   707  		p.To.Reg = v.Reg()
   708  		q := s.Prog(x86.ASETPC)
   709  		q.To.Type = obj.TYPE_REG
   710  		q.To.Reg = x86.REG_AX
   711  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   712  
   713  	case ssa.Op386InvertFlags:
   714  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   715  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   716  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   717  	case ssa.Op386REPSTOSL:
   718  		s.Prog(x86.AREP)
   719  		s.Prog(x86.ASTOSL)
   720  	case ssa.Op386REPMOVSL:
   721  		s.Prog(x86.AREP)
   722  		s.Prog(x86.AMOVSL)
   723  	case ssa.Op386LoweredNilCheck:
   724  		// Issue a load which will fault if the input is nil.
   725  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   726  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   727  		// but it doesn't have false dependency on AX.
   728  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   729  		// That trades clobbering flags for clobbering a register.
   730  		p := s.Prog(x86.ATESTB)
   731  		p.From.Type = obj.TYPE_REG
   732  		p.From.Reg = x86.REG_AX
   733  		p.To.Type = obj.TYPE_MEM
   734  		p.To.Reg = v.Args[0].Reg()
   735  		gc.AddAux(&p.To, v)
   736  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   737  			gc.Warnl(v.Pos, "generated nil check")
   738  		}
   739  	case ssa.Op386FCHS:
   740  		v.Fatalf("FCHS in non-387 mode")
   741  	default:
   742  		v.Fatalf("genValue not implemented: %s", v.LongString())
   743  	}
   744  }
   745  
   746  var blockJump = [...]struct {
   747  	asm, invasm obj.As
   748  }{
   749  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   750  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   751  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   752  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   753  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   754  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   755  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   756  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   757  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   758  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   759  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   760  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   761  }
   762  
   763  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   764  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   765  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   766  }
   767  var nefJumps = [2][2]gc.FloatingEQNEJump{
   768  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   769  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   770  }
   771  
   772  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   773  	switch b.Kind {
   774  	case ssa.BlockPlain:
   775  		if b.Succs[0].Block() != next {
   776  			p := s.Prog(obj.AJMP)
   777  			p.To.Type = obj.TYPE_BRANCH
   778  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   779  		}
   780  	case ssa.BlockDefer:
   781  		// defer returns in rax:
   782  		// 0 if we should continue executing
   783  		// 1 if we should jump to deferreturn call
   784  		p := s.Prog(x86.ATESTL)
   785  		p.From.Type = obj.TYPE_REG
   786  		p.From.Reg = x86.REG_AX
   787  		p.To.Type = obj.TYPE_REG
   788  		p.To.Reg = x86.REG_AX
   789  		p = s.Prog(x86.AJNE)
   790  		p.To.Type = obj.TYPE_BRANCH
   791  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   792  		if b.Succs[0].Block() != next {
   793  			p := s.Prog(obj.AJMP)
   794  			p.To.Type = obj.TYPE_BRANCH
   795  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   796  		}
   797  	case ssa.BlockExit:
   798  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   799  	case ssa.BlockRet:
   800  		s.Prog(obj.ARET)
   801  	case ssa.BlockRetJmp:
   802  		p := s.Prog(obj.AJMP)
   803  		p.To.Type = obj.TYPE_MEM
   804  		p.To.Name = obj.NAME_EXTERN
   805  		p.To.Sym = b.Aux.(*obj.LSym)
   806  
   807  	case ssa.Block386EQF:
   808  		s.FPJump(b, next, &eqfJumps)
   809  
   810  	case ssa.Block386NEF:
   811  		s.FPJump(b, next, &nefJumps)
   812  
   813  	case ssa.Block386EQ, ssa.Block386NE,
   814  		ssa.Block386LT, ssa.Block386GE,
   815  		ssa.Block386LE, ssa.Block386GT,
   816  		ssa.Block386ULT, ssa.Block386UGT,
   817  		ssa.Block386ULE, ssa.Block386UGE:
   818  		jmp := blockJump[b.Kind]
   819  		var p *obj.Prog
   820  		switch next {
   821  		case b.Succs[0].Block():
   822  			p = s.Prog(jmp.invasm)
   823  			p.To.Type = obj.TYPE_BRANCH
   824  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   825  		case b.Succs[1].Block():
   826  			p = s.Prog(jmp.asm)
   827  			p.To.Type = obj.TYPE_BRANCH
   828  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   829  		default:
   830  			p = s.Prog(jmp.asm)
   831  			p.To.Type = obj.TYPE_BRANCH
   832  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   833  			q := s.Prog(obj.AJMP)
   834  			q.To.Type = obj.TYPE_BRANCH
   835  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   836  		}
   837  
   838  	default:
   839  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   840  	}
   841  }