github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			var c *obj.Prog
   202  			switch v.Op {
   203  			case ssa.Op386DIVL, ssa.Op386MODL:
   204  				c = s.Prog(x86.ACMPL)
   205  				j = s.Prog(x86.AJEQ)
   206  				s.Prog(x86.ACDQ) //TODO: fix
   207  
   208  			case ssa.Op386DIVW, ssa.Op386MODW:
   209  				c = s.Prog(x86.ACMPW)
   210  				j = s.Prog(x86.AJEQ)
   211  				s.Prog(x86.ACWD)
   212  			}
   213  			c.From.Type = obj.TYPE_REG
   214  			c.From.Reg = x
   215  			c.To.Type = obj.TYPE_CONST
   216  			c.To.Offset = -1
   217  
   218  			j.To.Type = obj.TYPE_BRANCH
   219  		}
   220  
   221  		// for unsigned ints, we sign extend by setting DX = 0
   222  		// signed ints were sign extended above
   223  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   224  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   225  			c := s.Prog(x86.AXORL)
   226  			c.From.Type = obj.TYPE_REG
   227  			c.From.Reg = x86.REG_DX
   228  			c.To.Type = obj.TYPE_REG
   229  			c.To.Reg = x86.REG_DX
   230  		}
   231  
   232  		p := s.Prog(v.Op.Asm())
   233  		p.From.Type = obj.TYPE_REG
   234  		p.From.Reg = x
   235  
   236  		// signed division, rest of the check for -1 case
   237  		if j != nil {
   238  			j2 := s.Prog(obj.AJMP)
   239  			j2.To.Type = obj.TYPE_BRANCH
   240  
   241  			var n *obj.Prog
   242  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   243  				// n * -1 = -n
   244  				n = s.Prog(x86.ANEGL)
   245  				n.To.Type = obj.TYPE_REG
   246  				n.To.Reg = x86.REG_AX
   247  			} else {
   248  				// n % -1 == 0
   249  				n = s.Prog(x86.AXORL)
   250  				n.From.Type = obj.TYPE_REG
   251  				n.From.Reg = x86.REG_DX
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_DX
   254  			}
   255  
   256  			j.To.Val = n
   257  			j2.To.Val = s.Pc()
   258  		}
   259  
   260  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   261  		// the frontend rewrites constant division by 8/16/32 bit integers into
   262  		// HMUL by a constant
   263  		// SSA rewrites generate the 64 bit versions
   264  
   265  		// Arg[0] is already in AX as it's the only register we allow
   266  		// and DX is the only output we care about (the high bits)
   267  		p := s.Prog(v.Op.Asm())
   268  		p.From.Type = obj.TYPE_REG
   269  		p.From.Reg = v.Args[1].Reg()
   270  
   271  		// IMULB puts the high portion in AH instead of DL,
   272  		// so move it to DL for consistency
   273  		if v.Type.Size() == 1 {
   274  			m := s.Prog(x86.AMOVB)
   275  			m.From.Type = obj.TYPE_REG
   276  			m.From.Reg = x86.REG_AH
   277  			m.To.Type = obj.TYPE_REG
   278  			m.To.Reg = x86.REG_DX
   279  		}
   280  
   281  	case ssa.Op386MULLQU:
   282  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   283  		p := s.Prog(v.Op.Asm())
   284  		p.From.Type = obj.TYPE_REG
   285  		p.From.Reg = v.Args[1].Reg()
   286  
   287  	case ssa.Op386AVGLU:
   288  		// compute (x+y)/2 unsigned.
   289  		// Do a 32-bit add, the overflow goes into the carry.
   290  		// Shift right once and pull the carry back into the 31st bit.
   291  		r := v.Reg()
   292  		if r != v.Args[0].Reg() {
   293  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   294  		}
   295  		p := s.Prog(x86.AADDL)
   296  		p.From.Type = obj.TYPE_REG
   297  		p.To.Type = obj.TYPE_REG
   298  		p.To.Reg = r
   299  		p.From.Reg = v.Args[1].Reg()
   300  		p = s.Prog(x86.ARCRL)
   301  		p.From.Type = obj.TYPE_CONST
   302  		p.From.Offset = 1
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = r
   305  
   306  	case ssa.Op386ADDLconst:
   307  		r := v.Reg()
   308  		a := v.Args[0].Reg()
   309  		if r == a {
   310  			if v.AuxInt == 1 {
   311  				p := s.Prog(x86.AINCL)
   312  				p.To.Type = obj.TYPE_REG
   313  				p.To.Reg = r
   314  				return
   315  			}
   316  			if v.AuxInt == -1 {
   317  				p := s.Prog(x86.ADECL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			p := s.Prog(v.Op.Asm())
   323  			p.From.Type = obj.TYPE_CONST
   324  			p.From.Offset = v.AuxInt
   325  			p.To.Type = obj.TYPE_REG
   326  			p.To.Reg = r
   327  			return
   328  		}
   329  		p := s.Prog(x86.ALEAL)
   330  		p.From.Type = obj.TYPE_MEM
   331  		p.From.Reg = a
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  
   336  	case ssa.Op386MULLconst:
   337  		r := v.Reg()
   338  		if r != v.Args[0].Reg() {
   339  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   340  		}
   341  		p := s.Prog(v.Op.Asm())
   342  		p.From.Type = obj.TYPE_CONST
   343  		p.From.Offset = v.AuxInt
   344  		p.To.Type = obj.TYPE_REG
   345  		p.To.Reg = r
   346  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   347  		// then we don't need to use resultInArg0 for these ops.
   348  		//p.From3 = new(obj.Addr)
   349  		//p.From3.Type = obj.TYPE_REG
   350  		//p.From3.Reg = v.Args[0].Reg()
   351  
   352  	case ssa.Op386SUBLconst,
   353  		ssa.Op386ADCLconst,
   354  		ssa.Op386SBBLconst,
   355  		ssa.Op386ANDLconst,
   356  		ssa.Op386ORLconst,
   357  		ssa.Op386XORLconst,
   358  		ssa.Op386SHLLconst,
   359  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   360  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   361  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   362  		r := v.Reg()
   363  		if r != v.Args[0].Reg() {
   364  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   365  		}
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = v.AuxInt
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = r
   371  	case ssa.Op386SBBLcarrymask:
   372  		r := v.Reg()
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_REG
   375  		p.From.Reg = r
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   379  		r := v.Args[0].Reg()
   380  		i := v.Args[1].Reg()
   381  		p := s.Prog(x86.ALEAL)
   382  		switch v.Op {
   383  		case ssa.Op386LEAL1:
   384  			p.From.Scale = 1
   385  			if i == x86.REG_SP {
   386  				r, i = i, r
   387  			}
   388  		case ssa.Op386LEAL2:
   389  			p.From.Scale = 2
   390  		case ssa.Op386LEAL4:
   391  			p.From.Scale = 4
   392  		case ssa.Op386LEAL8:
   393  			p.From.Scale = 8
   394  		}
   395  		p.From.Type = obj.TYPE_MEM
   396  		p.From.Reg = r
   397  		p.From.Index = i
   398  		gc.AddAux(&p.From, v)
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg()
   401  	case ssa.Op386LEAL:
   402  		p := s.Prog(x86.ALEAL)
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = v.Args[0].Reg()
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   409  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   410  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   411  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   412  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   413  		// must account for that right here.
   414  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   415  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_REG
   418  		p.From.Reg = v.Args[0].Reg()
   419  		p.To.Type = obj.TYPE_CONST
   420  		p.To.Offset = v.AuxInt
   421  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_CONST
   424  		p.From.Offset = v.AuxInt
   425  		p.To.Type = obj.TYPE_REG
   426  		p.To.Reg = v.Args[0].Reg()
   427  	case ssa.Op386MOVLconst:
   428  		x := v.Reg()
   429  		p := s.Prog(v.Op.Asm())
   430  		p.From.Type = obj.TYPE_CONST
   431  		p.From.Offset = v.AuxInt
   432  		p.To.Type = obj.TYPE_REG
   433  		p.To.Reg = x
   434  		// If flags are live at this instruction, suppress the
   435  		// MOV $0,AX -> XOR AX,AX optimization.
   436  		if v.Aux != nil {
   437  			p.Mark |= x86.PRESERVEFLAGS
   438  		}
   439  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   440  		x := v.Reg()
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = x
   446  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   447  		p := s.Prog(x86.ALEAL)
   448  		p.From.Type = obj.TYPE_MEM
   449  		p.From.Name = obj.NAME_EXTERN
   450  		f := math.Float64frombits(uint64(v.AuxInt))
   451  		if v.Op == ssa.Op386MOVSDconst1 {
   452  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   453  		} else {
   454  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   455  		}
   456  		p.To.Type = obj.TYPE_REG
   457  		p.To.Reg = v.Reg()
   458  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_MEM
   461  		p.From.Reg = v.Args[0].Reg()
   462  		p.To.Type = obj.TYPE_REG
   463  		p.To.Reg = v.Reg()
   464  
   465  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_MEM
   468  		p.From.Reg = v.Args[0].Reg()
   469  		gc.AddAux(&p.From, v)
   470  		p.To.Type = obj.TYPE_REG
   471  		p.To.Reg = v.Reg()
   472  	case ssa.Op386MOVSDloadidx8:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.From.Scale = 8
   478  		p.From.Index = v.Args[1].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		gc.AddAux(&p.From, v)
   486  		p.From.Scale = 4
   487  		p.From.Index = v.Args[1].Reg()
   488  		p.To.Type = obj.TYPE_REG
   489  		p.To.Reg = v.Reg()
   490  	case ssa.Op386MOVWloadidx2:
   491  		p := s.Prog(v.Op.Asm())
   492  		p.From.Type = obj.TYPE_MEM
   493  		p.From.Reg = v.Args[0].Reg()
   494  		gc.AddAux(&p.From, v)
   495  		p.From.Scale = 2
   496  		p.From.Index = v.Args[1].Reg()
   497  		p.To.Type = obj.TYPE_REG
   498  		p.To.Reg = v.Reg()
   499  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   500  		r := v.Args[0].Reg()
   501  		i := v.Args[1].Reg()
   502  		if i == x86.REG_SP {
   503  			r, i = i, r
   504  		}
   505  		p := s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_MEM
   507  		p.From.Reg = r
   508  		p.From.Scale = 1
   509  		p.From.Index = i
   510  		gc.AddAux(&p.From, v)
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = v.Reg()
   513  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_REG
   516  		p.From.Reg = v.Args[1].Reg()
   517  		p.To.Type = obj.TYPE_MEM
   518  		p.To.Reg = v.Args[0].Reg()
   519  		gc.AddAux(&p.To, v)
   520  	case ssa.Op386MOVSDstoreidx8:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[2].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		p.To.Scale = 8
   527  		p.To.Index = v.Args[1].Reg()
   528  		gc.AddAux(&p.To, v)
   529  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   530  		p := s.Prog(v.Op.Asm())
   531  		p.From.Type = obj.TYPE_REG
   532  		p.From.Reg = v.Args[2].Reg()
   533  		p.To.Type = obj.TYPE_MEM
   534  		p.To.Reg = v.Args[0].Reg()
   535  		p.To.Scale = 4
   536  		p.To.Index = v.Args[1].Reg()
   537  		gc.AddAux(&p.To, v)
   538  	case ssa.Op386MOVWstoreidx2:
   539  		p := s.Prog(v.Op.Asm())
   540  		p.From.Type = obj.TYPE_REG
   541  		p.From.Reg = v.Args[2].Reg()
   542  		p.To.Type = obj.TYPE_MEM
   543  		p.To.Reg = v.Args[0].Reg()
   544  		p.To.Scale = 2
   545  		p.To.Index = v.Args[1].Reg()
   546  		gc.AddAux(&p.To, v)
   547  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   548  		r := v.Args[0].Reg()
   549  		i := v.Args[1].Reg()
   550  		if i == x86.REG_SP {
   551  			r, i = i, r
   552  		}
   553  		p := s.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_REG
   555  		p.From.Reg = v.Args[2].Reg()
   556  		p.To.Type = obj.TYPE_MEM
   557  		p.To.Reg = r
   558  		p.To.Scale = 1
   559  		p.To.Index = i
   560  		gc.AddAux(&p.To, v)
   561  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   562  		p := s.Prog(v.Op.Asm())
   563  		p.From.Type = obj.TYPE_CONST
   564  		sc := v.AuxValAndOff()
   565  		p.From.Offset = sc.Val()
   566  		p.To.Type = obj.TYPE_MEM
   567  		p.To.Reg = v.Args[0].Reg()
   568  		gc.AddAux2(&p.To, v, sc.Off())
   569  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_CONST
   572  		sc := v.AuxValAndOff()
   573  		p.From.Offset = sc.Val()
   574  		r := v.Args[0].Reg()
   575  		i := v.Args[1].Reg()
   576  		switch v.Op {
   577  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   578  			p.To.Scale = 1
   579  			if i == x86.REG_SP {
   580  				r, i = i, r
   581  			}
   582  		case ssa.Op386MOVWstoreconstidx2:
   583  			p.To.Scale = 2
   584  		case ssa.Op386MOVLstoreconstidx4:
   585  			p.To.Scale = 4
   586  		}
   587  		p.To.Type = obj.TYPE_MEM
   588  		p.To.Reg = r
   589  		p.To.Index = i
   590  		gc.AddAux2(&p.To, v, sc.Off())
   591  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   592  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   593  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   594  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   595  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   596  	case ssa.Op386DUFFZERO:
   597  		p := s.Prog(obj.ADUFFZERO)
   598  		p.To.Type = obj.TYPE_ADDR
   599  		p.To.Sym = gc.Duffzero
   600  		p.To.Offset = v.AuxInt
   601  	case ssa.Op386DUFFCOPY:
   602  		p := s.Prog(obj.ADUFFCOPY)
   603  		p.To.Type = obj.TYPE_ADDR
   604  		p.To.Sym = gc.Duffcopy
   605  		p.To.Offset = v.AuxInt
   606  
   607  	case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   608  		if v.Type.IsMemory() {
   609  			return
   610  		}
   611  		x := v.Args[0].Reg()
   612  		y := v.Reg()
   613  		if x != y {
   614  			opregreg(s, moveByType(v.Type), y, x)
   615  		}
   616  	case ssa.OpLoadReg:
   617  		if v.Type.IsFlags() {
   618  			v.Fatalf("load flags not implemented: %v", v.LongString())
   619  			return
   620  		}
   621  		p := s.Prog(loadByType(v.Type))
   622  		gc.AddrAuto(&p.From, v.Args[0])
   623  		p.To.Type = obj.TYPE_REG
   624  		p.To.Reg = v.Reg()
   625  
   626  	case ssa.OpStoreReg:
   627  		if v.Type.IsFlags() {
   628  			v.Fatalf("store flags not implemented: %v", v.LongString())
   629  			return
   630  		}
   631  		p := s.Prog(storeByType(v.Type))
   632  		p.From.Type = obj.TYPE_REG
   633  		p.From.Reg = v.Args[0].Reg()
   634  		gc.AddrAuto(&p.To, v)
   635  	case ssa.Op386LoweredGetClosurePtr:
   636  		// Closure pointer is DX.
   637  		gc.CheckLoweredGetClosurePtr(v)
   638  	case ssa.Op386LoweredGetG:
   639  		r := v.Reg()
   640  		// See the comments in cmd/internal/obj/x86/obj6.go
   641  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   642  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   643  			// MOVL (TLS), r
   644  			p := s.Prog(x86.AMOVL)
   645  			p.From.Type = obj.TYPE_MEM
   646  			p.From.Reg = x86.REG_TLS
   647  			p.To.Type = obj.TYPE_REG
   648  			p.To.Reg = r
   649  		} else {
   650  			// MOVL TLS, r
   651  			// MOVL (r)(TLS*1), r
   652  			p := s.Prog(x86.AMOVL)
   653  			p.From.Type = obj.TYPE_REG
   654  			p.From.Reg = x86.REG_TLS
   655  			p.To.Type = obj.TYPE_REG
   656  			p.To.Reg = r
   657  			q := s.Prog(x86.AMOVL)
   658  			q.From.Type = obj.TYPE_MEM
   659  			q.From.Reg = r
   660  			q.From.Index = x86.REG_TLS
   661  			q.From.Scale = 1
   662  			q.To.Type = obj.TYPE_REG
   663  			q.To.Reg = r
   664  		}
   665  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   666  		s.Call(v)
   667  	case ssa.Op386NEGL,
   668  		ssa.Op386BSWAPL,
   669  		ssa.Op386NOTL:
   670  		r := v.Reg()
   671  		if r != v.Args[0].Reg() {
   672  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   673  		}
   674  		p := s.Prog(v.Op.Asm())
   675  		p.To.Type = obj.TYPE_REG
   676  		p.To.Reg = r
   677  	case ssa.Op386BSFL, ssa.Op386BSFW,
   678  		ssa.Op386BSRL, ssa.Op386BSRW,
   679  		ssa.Op386SQRTSD:
   680  		p := s.Prog(v.Op.Asm())
   681  		p.From.Type = obj.TYPE_REG
   682  		p.From.Reg = v.Args[0].Reg()
   683  		p.To.Type = obj.TYPE_REG
   684  		p.To.Reg = v.Reg()
   685  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   686  		ssa.Op386SETL, ssa.Op386SETLE,
   687  		ssa.Op386SETG, ssa.Op386SETGE,
   688  		ssa.Op386SETGF, ssa.Op386SETGEF,
   689  		ssa.Op386SETB, ssa.Op386SETBE,
   690  		ssa.Op386SETORD, ssa.Op386SETNAN,
   691  		ssa.Op386SETA, ssa.Op386SETAE:
   692  		p := s.Prog(v.Op.Asm())
   693  		p.To.Type = obj.TYPE_REG
   694  		p.To.Reg = v.Reg()
   695  
   696  	case ssa.Op386SETNEF:
   697  		p := s.Prog(v.Op.Asm())
   698  		p.To.Type = obj.TYPE_REG
   699  		p.To.Reg = v.Reg()
   700  		q := s.Prog(x86.ASETPS)
   701  		q.To.Type = obj.TYPE_REG
   702  		q.To.Reg = x86.REG_AX
   703  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   704  
   705  	case ssa.Op386SETEQF:
   706  		p := s.Prog(v.Op.Asm())
   707  		p.To.Type = obj.TYPE_REG
   708  		p.To.Reg = v.Reg()
   709  		q := s.Prog(x86.ASETPC)
   710  		q.To.Type = obj.TYPE_REG
   711  		q.To.Reg = x86.REG_AX
   712  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   713  
   714  	case ssa.Op386InvertFlags:
   715  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   716  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   717  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   718  	case ssa.Op386REPSTOSL:
   719  		s.Prog(x86.AREP)
   720  		s.Prog(x86.ASTOSL)
   721  	case ssa.Op386REPMOVSL:
   722  		s.Prog(x86.AREP)
   723  		s.Prog(x86.AMOVSL)
   724  	case ssa.Op386LoweredNilCheck:
   725  		// Issue a load which will fault if the input is nil.
   726  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   727  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   728  		// but it doesn't have false dependency on AX.
   729  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   730  		// That trades clobbering flags for clobbering a register.
   731  		p := s.Prog(x86.ATESTB)
   732  		p.From.Type = obj.TYPE_REG
   733  		p.From.Reg = x86.REG_AX
   734  		p.To.Type = obj.TYPE_MEM
   735  		p.To.Reg = v.Args[0].Reg()
   736  		gc.AddAux(&p.To, v)
   737  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   738  			gc.Warnl(v.Pos, "generated nil check")
   739  		}
   740  	case ssa.Op386FCHS:
   741  		v.Fatalf("FCHS in non-387 mode")
   742  	case ssa.OpClobber:
   743  		p := s.Prog(x86.AMOVL)
   744  		p.From.Type = obj.TYPE_CONST
   745  		p.From.Offset = 0xdeaddead
   746  		p.To.Type = obj.TYPE_MEM
   747  		p.To.Reg = x86.REG_SP
   748  		gc.AddAux(&p.To, v)
   749  	default:
   750  		v.Fatalf("genValue not implemented: %s", v.LongString())
   751  	}
   752  }
   753  
   754  var blockJump = [...]struct {
   755  	asm, invasm obj.As
   756  }{
   757  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   758  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   759  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   760  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   761  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   762  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   763  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   764  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   765  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   766  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   767  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   768  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   769  }
   770  
   771  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   772  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   773  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   774  }
   775  var nefJumps = [2][2]gc.FloatingEQNEJump{
   776  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   777  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   778  }
   779  
   780  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   781  	switch b.Kind {
   782  	case ssa.BlockPlain:
   783  		if b.Succs[0].Block() != next {
   784  			p := s.Prog(obj.AJMP)
   785  			p.To.Type = obj.TYPE_BRANCH
   786  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   787  		}
   788  	case ssa.BlockDefer:
   789  		// defer returns in rax:
   790  		// 0 if we should continue executing
   791  		// 1 if we should jump to deferreturn call
   792  		p := s.Prog(x86.ATESTL)
   793  		p.From.Type = obj.TYPE_REG
   794  		p.From.Reg = x86.REG_AX
   795  		p.To.Type = obj.TYPE_REG
   796  		p.To.Reg = x86.REG_AX
   797  		p = s.Prog(x86.AJNE)
   798  		p.To.Type = obj.TYPE_BRANCH
   799  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   800  		if b.Succs[0].Block() != next {
   801  			p := s.Prog(obj.AJMP)
   802  			p.To.Type = obj.TYPE_BRANCH
   803  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   804  		}
   805  	case ssa.BlockExit:
   806  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   807  	case ssa.BlockRet:
   808  		s.Prog(obj.ARET)
   809  	case ssa.BlockRetJmp:
   810  		p := s.Prog(obj.AJMP)
   811  		p.To.Type = obj.TYPE_MEM
   812  		p.To.Name = obj.NAME_EXTERN
   813  		p.To.Sym = b.Aux.(*obj.LSym)
   814  
   815  	case ssa.Block386EQF:
   816  		s.FPJump(b, next, &eqfJumps)
   817  
   818  	case ssa.Block386NEF:
   819  		s.FPJump(b, next, &nefJumps)
   820  
   821  	case ssa.Block386EQ, ssa.Block386NE,
   822  		ssa.Block386LT, ssa.Block386GE,
   823  		ssa.Block386LE, ssa.Block386GT,
   824  		ssa.Block386ULT, ssa.Block386UGT,
   825  		ssa.Block386ULE, ssa.Block386UGE:
   826  		jmp := blockJump[b.Kind]
   827  		var p *obj.Prog
   828  		switch next {
   829  		case b.Succs[0].Block():
   830  			p = s.Prog(jmp.invasm)
   831  			p.To.Type = obj.TYPE_BRANCH
   832  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   833  		case b.Succs[1].Block():
   834  			p = s.Prog(jmp.asm)
   835  			p.To.Type = obj.TYPE_BRANCH
   836  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   837  		default:
   838  			p = s.Prog(jmp.asm)
   839  			p.To.Type = obj.TYPE_BRANCH
   840  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   841  			q := s.Prog(obj.AJMP)
   842  			q.To.Type = obj.TYPE_BRANCH
   843  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   844  		}
   845  
   846  	default:
   847  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   848  	}
   849  }