github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			var c *obj.Prog
   202  			switch v.Op {
   203  			case ssa.Op386DIVL, ssa.Op386MODL:
   204  				c = s.Prog(x86.ACMPL)
   205  				j = s.Prog(x86.AJEQ)
   206  				s.Prog(x86.ACDQ) //TODO: fix
   207  
   208  			case ssa.Op386DIVW, ssa.Op386MODW:
   209  				c = s.Prog(x86.ACMPW)
   210  				j = s.Prog(x86.AJEQ)
   211  				s.Prog(x86.ACWD)
   212  			}
   213  			c.From.Type = obj.TYPE_REG
   214  			c.From.Reg = x
   215  			c.To.Type = obj.TYPE_CONST
   216  			c.To.Offset = -1
   217  
   218  			j.To.Type = obj.TYPE_BRANCH
   219  		}
   220  
   221  		// for unsigned ints, we sign extend by setting DX = 0
   222  		// signed ints were sign extended above
   223  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   224  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   225  			c := s.Prog(x86.AXORL)
   226  			c.From.Type = obj.TYPE_REG
   227  			c.From.Reg = x86.REG_DX
   228  			c.To.Type = obj.TYPE_REG
   229  			c.To.Reg = x86.REG_DX
   230  		}
   231  
   232  		p := s.Prog(v.Op.Asm())
   233  		p.From.Type = obj.TYPE_REG
   234  		p.From.Reg = x
   235  
   236  		// signed division, rest of the check for -1 case
   237  		if j != nil {
   238  			j2 := s.Prog(obj.AJMP)
   239  			j2.To.Type = obj.TYPE_BRANCH
   240  
   241  			var n *obj.Prog
   242  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   243  				// n * -1 = -n
   244  				n = s.Prog(x86.ANEGL)
   245  				n.To.Type = obj.TYPE_REG
   246  				n.To.Reg = x86.REG_AX
   247  			} else {
   248  				// n % -1 == 0
   249  				n = s.Prog(x86.AXORL)
   250  				n.From.Type = obj.TYPE_REG
   251  				n.From.Reg = x86.REG_DX
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_DX
   254  			}
   255  
   256  			j.To.Val = n
   257  			j2.To.Val = s.Pc()
   258  		}
   259  
   260  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   261  		// the frontend rewrites constant division by 8/16/32 bit integers into
   262  		// HMUL by a constant
   263  		// SSA rewrites generate the 64 bit versions
   264  
   265  		// Arg[0] is already in AX as it's the only register we allow
   266  		// and DX is the only output we care about (the high bits)
   267  		p := s.Prog(v.Op.Asm())
   268  		p.From.Type = obj.TYPE_REG
   269  		p.From.Reg = v.Args[1].Reg()
   270  
   271  		// IMULB puts the high portion in AH instead of DL,
   272  		// so move it to DL for consistency
   273  		if v.Type.Size() == 1 {
   274  			m := s.Prog(x86.AMOVB)
   275  			m.From.Type = obj.TYPE_REG
   276  			m.From.Reg = x86.REG_AH
   277  			m.To.Type = obj.TYPE_REG
   278  			m.To.Reg = x86.REG_DX
   279  		}
   280  
   281  	case ssa.Op386MULLQU:
   282  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   283  		p := s.Prog(v.Op.Asm())
   284  		p.From.Type = obj.TYPE_REG
   285  		p.From.Reg = v.Args[1].Reg()
   286  
   287  	case ssa.Op386AVGLU:
   288  		// compute (x+y)/2 unsigned.
   289  		// Do a 32-bit add, the overflow goes into the carry.
   290  		// Shift right once and pull the carry back into the 31st bit.
   291  		r := v.Reg()
   292  		if r != v.Args[0].Reg() {
   293  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   294  		}
   295  		p := s.Prog(x86.AADDL)
   296  		p.From.Type = obj.TYPE_REG
   297  		p.To.Type = obj.TYPE_REG
   298  		p.To.Reg = r
   299  		p.From.Reg = v.Args[1].Reg()
   300  		p = s.Prog(x86.ARCRL)
   301  		p.From.Type = obj.TYPE_CONST
   302  		p.From.Offset = 1
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = r
   305  
   306  	case ssa.Op386ADDLconst:
   307  		r := v.Reg()
   308  		a := v.Args[0].Reg()
   309  		if r == a {
   310  			if v.AuxInt == 1 {
   311  				p := s.Prog(x86.AINCL)
   312  				p.To.Type = obj.TYPE_REG
   313  				p.To.Reg = r
   314  				return
   315  			}
   316  			if v.AuxInt == -1 {
   317  				p := s.Prog(x86.ADECL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			p := s.Prog(v.Op.Asm())
   323  			p.From.Type = obj.TYPE_CONST
   324  			p.From.Offset = v.AuxInt
   325  			p.To.Type = obj.TYPE_REG
   326  			p.To.Reg = r
   327  			return
   328  		}
   329  		p := s.Prog(x86.ALEAL)
   330  		p.From.Type = obj.TYPE_MEM
   331  		p.From.Reg = a
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  
   336  	case ssa.Op386MULLconst:
   337  		r := v.Reg()
   338  		if r != v.Args[0].Reg() {
   339  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   340  		}
   341  		p := s.Prog(v.Op.Asm())
   342  		p.From.Type = obj.TYPE_CONST
   343  		p.From.Offset = v.AuxInt
   344  		p.To.Type = obj.TYPE_REG
   345  		p.To.Reg = r
   346  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   347  		// then we don't need to use resultInArg0 for these ops.
   348  		//p.From3 = new(obj.Addr)
   349  		//p.From3.Type = obj.TYPE_REG
   350  		//p.From3.Reg = v.Args[0].Reg()
   351  
   352  	case ssa.Op386SUBLconst,
   353  		ssa.Op386ADCLconst,
   354  		ssa.Op386SBBLconst,
   355  		ssa.Op386ANDLconst,
   356  		ssa.Op386ORLconst,
   357  		ssa.Op386XORLconst,
   358  		ssa.Op386SHLLconst,
   359  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   360  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   361  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   362  		r := v.Reg()
   363  		if r != v.Args[0].Reg() {
   364  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   365  		}
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = v.AuxInt
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = r
   371  	case ssa.Op386SBBLcarrymask:
   372  		r := v.Reg()
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_REG
   375  		p.From.Reg = r
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   379  		r := v.Args[0].Reg()
   380  		i := v.Args[1].Reg()
   381  		p := s.Prog(x86.ALEAL)
   382  		switch v.Op {
   383  		case ssa.Op386LEAL1:
   384  			p.From.Scale = 1
   385  			if i == x86.REG_SP {
   386  				r, i = i, r
   387  			}
   388  		case ssa.Op386LEAL2:
   389  			p.From.Scale = 2
   390  		case ssa.Op386LEAL4:
   391  			p.From.Scale = 4
   392  		case ssa.Op386LEAL8:
   393  			p.From.Scale = 8
   394  		}
   395  		p.From.Type = obj.TYPE_MEM
   396  		p.From.Reg = r
   397  		p.From.Index = i
   398  		gc.AddAux(&p.From, v)
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg()
   401  	case ssa.Op386LEAL:
   402  		p := s.Prog(x86.ALEAL)
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = v.Args[0].Reg()
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   409  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   410  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   411  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   412  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   413  		// must account for that right here.
   414  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   415  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_REG
   418  		p.From.Reg = v.Args[0].Reg()
   419  		p.To.Type = obj.TYPE_CONST
   420  		p.To.Offset = v.AuxInt
   421  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_CONST
   424  		p.From.Offset = v.AuxInt
   425  		p.To.Type = obj.TYPE_REG
   426  		p.To.Reg = v.Args[0].Reg()
   427  	case ssa.Op386MOVLconst:
   428  		x := v.Reg()
   429  
   430  		// If flags aren't live (indicated by v.Aux == nil),
   431  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   432  		if v.AuxInt == 0 && v.Aux == nil {
   433  			p := s.Prog(x86.AXORL)
   434  			p.From.Type = obj.TYPE_REG
   435  			p.From.Reg = x
   436  			p.To.Type = obj.TYPE_REG
   437  			p.To.Reg = x
   438  			break
   439  		}
   440  
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_CONST
   443  		p.From.Offset = v.AuxInt
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = x
   446  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   447  		x := v.Reg()
   448  		p := s.Prog(v.Op.Asm())
   449  		p.From.Type = obj.TYPE_FCONST
   450  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = x
   453  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   454  		p := s.Prog(x86.ALEAL)
   455  		p.From.Type = obj.TYPE_MEM
   456  		p.From.Name = obj.NAME_EXTERN
   457  		f := math.Float64frombits(uint64(v.AuxInt))
   458  		if v.Op == ssa.Op386MOVSDconst1 {
   459  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   460  		} else {
   461  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   462  		}
   463  		p.To.Type = obj.TYPE_REG
   464  		p.To.Reg = v.Reg()
   465  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_MEM
   468  		p.From.Reg = v.Args[0].Reg()
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  
   472  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.To.Type = obj.TYPE_REG
   478  		p.To.Reg = v.Reg()
   479  	case ssa.Op386MOVSDloadidx8:
   480  		p := s.Prog(v.Op.Asm())
   481  		p.From.Type = obj.TYPE_MEM
   482  		p.From.Reg = v.Args[0].Reg()
   483  		gc.AddAux(&p.From, v)
   484  		p.From.Scale = 8
   485  		p.From.Index = v.Args[1].Reg()
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = v.Reg()
   488  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   489  		p := s.Prog(v.Op.Asm())
   490  		p.From.Type = obj.TYPE_MEM
   491  		p.From.Reg = v.Args[0].Reg()
   492  		gc.AddAux(&p.From, v)
   493  		p.From.Scale = 4
   494  		p.From.Index = v.Args[1].Reg()
   495  		p.To.Type = obj.TYPE_REG
   496  		p.To.Reg = v.Reg()
   497  	case ssa.Op386MOVWloadidx2:
   498  		p := s.Prog(v.Op.Asm())
   499  		p.From.Type = obj.TYPE_MEM
   500  		p.From.Reg = v.Args[0].Reg()
   501  		gc.AddAux(&p.From, v)
   502  		p.From.Scale = 2
   503  		p.From.Index = v.Args[1].Reg()
   504  		p.To.Type = obj.TYPE_REG
   505  		p.To.Reg = v.Reg()
   506  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   507  		r := v.Args[0].Reg()
   508  		i := v.Args[1].Reg()
   509  		if i == x86.REG_SP {
   510  			r, i = i, r
   511  		}
   512  		p := s.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_MEM
   514  		p.From.Reg = r
   515  		p.From.Scale = 1
   516  		p.From.Index = i
   517  		gc.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[1].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		gc.AddAux(&p.To, v)
   527  	case ssa.Op386MOVSDstoreidx8:
   528  		p := s.Prog(v.Op.Asm())
   529  		p.From.Type = obj.TYPE_REG
   530  		p.From.Reg = v.Args[2].Reg()
   531  		p.To.Type = obj.TYPE_MEM
   532  		p.To.Reg = v.Args[0].Reg()
   533  		p.To.Scale = 8
   534  		p.To.Index = v.Args[1].Reg()
   535  		gc.AddAux(&p.To, v)
   536  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   537  		p := s.Prog(v.Op.Asm())
   538  		p.From.Type = obj.TYPE_REG
   539  		p.From.Reg = v.Args[2].Reg()
   540  		p.To.Type = obj.TYPE_MEM
   541  		p.To.Reg = v.Args[0].Reg()
   542  		p.To.Scale = 4
   543  		p.To.Index = v.Args[1].Reg()
   544  		gc.AddAux(&p.To, v)
   545  	case ssa.Op386MOVWstoreidx2:
   546  		p := s.Prog(v.Op.Asm())
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = v.Args[2].Reg()
   549  		p.To.Type = obj.TYPE_MEM
   550  		p.To.Reg = v.Args[0].Reg()
   551  		p.To.Scale = 2
   552  		p.To.Index = v.Args[1].Reg()
   553  		gc.AddAux(&p.To, v)
   554  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   555  		r := v.Args[0].Reg()
   556  		i := v.Args[1].Reg()
   557  		if i == x86.REG_SP {
   558  			r, i = i, r
   559  		}
   560  		p := s.Prog(v.Op.Asm())
   561  		p.From.Type = obj.TYPE_REG
   562  		p.From.Reg = v.Args[2].Reg()
   563  		p.To.Type = obj.TYPE_MEM
   564  		p.To.Reg = r
   565  		p.To.Scale = 1
   566  		p.To.Index = i
   567  		gc.AddAux(&p.To, v)
   568  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   569  		p := s.Prog(v.Op.Asm())
   570  		p.From.Type = obj.TYPE_CONST
   571  		sc := v.AuxValAndOff()
   572  		p.From.Offset = sc.Val()
   573  		p.To.Type = obj.TYPE_MEM
   574  		p.To.Reg = v.Args[0].Reg()
   575  		gc.AddAux2(&p.To, v, sc.Off())
   576  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_CONST
   579  		sc := v.AuxValAndOff()
   580  		p.From.Offset = sc.Val()
   581  		r := v.Args[0].Reg()
   582  		i := v.Args[1].Reg()
   583  		switch v.Op {
   584  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   585  			p.To.Scale = 1
   586  			if i == x86.REG_SP {
   587  				r, i = i, r
   588  			}
   589  		case ssa.Op386MOVWstoreconstidx2:
   590  			p.To.Scale = 2
   591  		case ssa.Op386MOVLstoreconstidx4:
   592  			p.To.Scale = 4
   593  		}
   594  		p.To.Type = obj.TYPE_MEM
   595  		p.To.Reg = r
   596  		p.To.Index = i
   597  		gc.AddAux2(&p.To, v, sc.Off())
   598  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   599  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   600  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   601  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   602  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   603  	case ssa.Op386DUFFZERO:
   604  		p := s.Prog(obj.ADUFFZERO)
   605  		p.To.Type = obj.TYPE_ADDR
   606  		p.To.Sym = gc.Duffzero
   607  		p.To.Offset = v.AuxInt
   608  	case ssa.Op386DUFFCOPY:
   609  		p := s.Prog(obj.ADUFFCOPY)
   610  		p.To.Type = obj.TYPE_ADDR
   611  		p.To.Sym = gc.Duffcopy
   612  		p.To.Offset = v.AuxInt
   613  
   614  	case ssa.Op386MOVLconvert:
   615  		if v.Args[0].Reg() != v.Reg() {
   616  			v.Fatalf("MOVLconvert should be a no-op")
   617  		}
   618  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   619  		if v.Type.IsMemory() {
   620  			return
   621  		}
   622  		x := v.Args[0].Reg()
   623  		y := v.Reg()
   624  		if x != y {
   625  			opregreg(s, moveByType(v.Type), y, x)
   626  		}
   627  	case ssa.OpLoadReg:
   628  		if v.Type.IsFlags() {
   629  			v.Fatalf("load flags not implemented: %v", v.LongString())
   630  			return
   631  		}
   632  		p := s.Prog(loadByType(v.Type))
   633  		gc.AddrAuto(&p.From, v.Args[0])
   634  		p.To.Type = obj.TYPE_REG
   635  		p.To.Reg = v.Reg()
   636  
   637  	case ssa.OpStoreReg:
   638  		if v.Type.IsFlags() {
   639  			v.Fatalf("store flags not implemented: %v", v.LongString())
   640  			return
   641  		}
   642  		p := s.Prog(storeByType(v.Type))
   643  		p.From.Type = obj.TYPE_REG
   644  		p.From.Reg = v.Args[0].Reg()
   645  		gc.AddrAuto(&p.To, v)
   646  	case ssa.Op386LoweredGetClosurePtr:
   647  		// Closure pointer is DX.
   648  		gc.CheckLoweredGetClosurePtr(v)
   649  	case ssa.Op386LoweredGetG:
   650  		r := v.Reg()
   651  		// See the comments in cmd/internal/obj/x86/obj6.go
   652  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   653  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   654  			// MOVL (TLS), r
   655  			p := s.Prog(x86.AMOVL)
   656  			p.From.Type = obj.TYPE_MEM
   657  			p.From.Reg = x86.REG_TLS
   658  			p.To.Type = obj.TYPE_REG
   659  			p.To.Reg = r
   660  		} else {
   661  			// MOVL TLS, r
   662  			// MOVL (r)(TLS*1), r
   663  			p := s.Prog(x86.AMOVL)
   664  			p.From.Type = obj.TYPE_REG
   665  			p.From.Reg = x86.REG_TLS
   666  			p.To.Type = obj.TYPE_REG
   667  			p.To.Reg = r
   668  			q := s.Prog(x86.AMOVL)
   669  			q.From.Type = obj.TYPE_MEM
   670  			q.From.Reg = r
   671  			q.From.Index = x86.REG_TLS
   672  			q.From.Scale = 1
   673  			q.To.Type = obj.TYPE_REG
   674  			q.To.Reg = r
   675  		}
   676  
   677  	case ssa.Op386LoweredGetCallerPC:
   678  		p := s.Prog(x86.AMOVL)
   679  		p.From.Type = obj.TYPE_MEM
   680  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   681  		p.From.Name = obj.NAME_PARAM
   682  		p.To.Type = obj.TYPE_REG
   683  		p.To.Reg = v.Reg()
   684  
   685  	case ssa.Op386LoweredGetCallerSP:
   686  		// caller's SP is the address of the first arg
   687  		p := s.Prog(x86.AMOVL)
   688  		p.From.Type = obj.TYPE_ADDR
   689  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   690  		p.From.Name = obj.NAME_PARAM
   691  		p.To.Type = obj.TYPE_REG
   692  		p.To.Reg = v.Reg()
   693  
   694  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   695  		s.Call(v)
   696  	case ssa.Op386NEGL,
   697  		ssa.Op386BSWAPL,
   698  		ssa.Op386NOTL:
   699  		r := v.Reg()
   700  		if r != v.Args[0].Reg() {
   701  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   702  		}
   703  		p := s.Prog(v.Op.Asm())
   704  		p.To.Type = obj.TYPE_REG
   705  		p.To.Reg = r
   706  	case ssa.Op386BSFL, ssa.Op386BSFW,
   707  		ssa.Op386BSRL, ssa.Op386BSRW,
   708  		ssa.Op386SQRTSD:
   709  		p := s.Prog(v.Op.Asm())
   710  		p.From.Type = obj.TYPE_REG
   711  		p.From.Reg = v.Args[0].Reg()
   712  		p.To.Type = obj.TYPE_REG
   713  		p.To.Reg = v.Reg()
   714  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   715  		ssa.Op386SETL, ssa.Op386SETLE,
   716  		ssa.Op386SETG, ssa.Op386SETGE,
   717  		ssa.Op386SETGF, ssa.Op386SETGEF,
   718  		ssa.Op386SETB, ssa.Op386SETBE,
   719  		ssa.Op386SETORD, ssa.Op386SETNAN,
   720  		ssa.Op386SETA, ssa.Op386SETAE:
   721  		p := s.Prog(v.Op.Asm())
   722  		p.To.Type = obj.TYPE_REG
   723  		p.To.Reg = v.Reg()
   724  
   725  	case ssa.Op386SETNEF:
   726  		p := s.Prog(v.Op.Asm())
   727  		p.To.Type = obj.TYPE_REG
   728  		p.To.Reg = v.Reg()
   729  		q := s.Prog(x86.ASETPS)
   730  		q.To.Type = obj.TYPE_REG
   731  		q.To.Reg = x86.REG_AX
   732  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   733  
   734  	case ssa.Op386SETEQF:
   735  		p := s.Prog(v.Op.Asm())
   736  		p.To.Type = obj.TYPE_REG
   737  		p.To.Reg = v.Reg()
   738  		q := s.Prog(x86.ASETPC)
   739  		q.To.Type = obj.TYPE_REG
   740  		q.To.Reg = x86.REG_AX
   741  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   742  
   743  	case ssa.Op386InvertFlags:
   744  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   745  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   746  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   747  	case ssa.Op386REPSTOSL:
   748  		s.Prog(x86.AREP)
   749  		s.Prog(x86.ASTOSL)
   750  	case ssa.Op386REPMOVSL:
   751  		s.Prog(x86.AREP)
   752  		s.Prog(x86.AMOVSL)
   753  	case ssa.Op386LoweredNilCheck:
   754  		// Issue a load which will fault if the input is nil.
   755  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   756  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   757  		// but it doesn't have false dependency on AX.
   758  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   759  		// That trades clobbering flags for clobbering a register.
   760  		p := s.Prog(x86.ATESTB)
   761  		p.From.Type = obj.TYPE_REG
   762  		p.From.Reg = x86.REG_AX
   763  		p.To.Type = obj.TYPE_MEM
   764  		p.To.Reg = v.Args[0].Reg()
   765  		gc.AddAux(&p.To, v)
   766  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   767  			gc.Warnl(v.Pos, "generated nil check")
   768  		}
   769  	case ssa.Op386FCHS:
   770  		v.Fatalf("FCHS in non-387 mode")
   771  	case ssa.OpClobber:
   772  		p := s.Prog(x86.AMOVL)
   773  		p.From.Type = obj.TYPE_CONST
   774  		p.From.Offset = 0xdeaddead
   775  		p.To.Type = obj.TYPE_MEM
   776  		p.To.Reg = x86.REG_SP
   777  		gc.AddAux(&p.To, v)
   778  	default:
   779  		v.Fatalf("genValue not implemented: %s", v.LongString())
   780  	}
   781  }
   782  
   783  var blockJump = [...]struct {
   784  	asm, invasm obj.As
   785  }{
   786  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   787  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   788  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   789  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   790  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   791  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   792  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   793  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   794  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   795  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   796  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   797  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   798  }
   799  
   800  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   801  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   802  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   803  }
   804  var nefJumps = [2][2]gc.FloatingEQNEJump{
   805  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   806  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   807  }
   808  
   809  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   810  	switch b.Kind {
   811  	case ssa.BlockPlain:
   812  		if b.Succs[0].Block() != next {
   813  			p := s.Prog(obj.AJMP)
   814  			p.To.Type = obj.TYPE_BRANCH
   815  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   816  		}
   817  	case ssa.BlockDefer:
   818  		// defer returns in rax:
   819  		// 0 if we should continue executing
   820  		// 1 if we should jump to deferreturn call
   821  		p := s.Prog(x86.ATESTL)
   822  		p.From.Type = obj.TYPE_REG
   823  		p.From.Reg = x86.REG_AX
   824  		p.To.Type = obj.TYPE_REG
   825  		p.To.Reg = x86.REG_AX
   826  		p = s.Prog(x86.AJNE)
   827  		p.To.Type = obj.TYPE_BRANCH
   828  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   829  		if b.Succs[0].Block() != next {
   830  			p := s.Prog(obj.AJMP)
   831  			p.To.Type = obj.TYPE_BRANCH
   832  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   833  		}
   834  	case ssa.BlockExit:
   835  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   836  	case ssa.BlockRet:
   837  		s.Prog(obj.ARET)
   838  	case ssa.BlockRetJmp:
   839  		p := s.Prog(obj.AJMP)
   840  		p.To.Type = obj.TYPE_MEM
   841  		p.To.Name = obj.NAME_EXTERN
   842  		p.To.Sym = b.Aux.(*obj.LSym)
   843  
   844  	case ssa.Block386EQF:
   845  		s.FPJump(b, next, &eqfJumps)
   846  
   847  	case ssa.Block386NEF:
   848  		s.FPJump(b, next, &nefJumps)
   849  
   850  	case ssa.Block386EQ, ssa.Block386NE,
   851  		ssa.Block386LT, ssa.Block386GE,
   852  		ssa.Block386LE, ssa.Block386GT,
   853  		ssa.Block386ULT, ssa.Block386UGT,
   854  		ssa.Block386ULE, ssa.Block386UGE:
   855  		jmp := blockJump[b.Kind]
   856  		var p *obj.Prog
   857  		switch next {
   858  		case b.Succs[0].Block():
   859  			p = s.Prog(jmp.invasm)
   860  			p.To.Type = obj.TYPE_BRANCH
   861  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   862  		case b.Succs[1].Block():
   863  			p = s.Prog(jmp.asm)
   864  			p.To.Type = obj.TYPE_BRANCH
   865  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   866  		default:
   867  			p = s.Prog(jmp.asm)
   868  			p.To.Type = obj.TYPE_BRANCH
   869  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   870  			q := s.Prog(obj.AJMP)
   871  			q.To.Type = obj.TYPE_BRANCH
   872  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   873  		}
   874  
   875  	default:
   876  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   877  	}
   878  }