github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			var c *obj.Prog
   202  			switch v.Op {
   203  			case ssa.Op386DIVL, ssa.Op386MODL:
   204  				c = s.Prog(x86.ACMPL)
   205  				j = s.Prog(x86.AJEQ)
   206  				s.Prog(x86.ACDQ) //TODO: fix
   207  
   208  			case ssa.Op386DIVW, ssa.Op386MODW:
   209  				c = s.Prog(x86.ACMPW)
   210  				j = s.Prog(x86.AJEQ)
   211  				s.Prog(x86.ACWD)
   212  			}
   213  			c.From.Type = obj.TYPE_REG
   214  			c.From.Reg = x
   215  			c.To.Type = obj.TYPE_CONST
   216  			c.To.Offset = -1
   217  
   218  			j.To.Type = obj.TYPE_BRANCH
   219  		}
   220  
   221  		// for unsigned ints, we sign extend by setting DX = 0
   222  		// signed ints were sign extended above
   223  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   224  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   225  			c := s.Prog(x86.AXORL)
   226  			c.From.Type = obj.TYPE_REG
   227  			c.From.Reg = x86.REG_DX
   228  			c.To.Type = obj.TYPE_REG
   229  			c.To.Reg = x86.REG_DX
   230  		}
   231  
   232  		p := s.Prog(v.Op.Asm())
   233  		p.From.Type = obj.TYPE_REG
   234  		p.From.Reg = x
   235  
   236  		// signed division, rest of the check for -1 case
   237  		if j != nil {
   238  			j2 := s.Prog(obj.AJMP)
   239  			j2.To.Type = obj.TYPE_BRANCH
   240  
   241  			var n *obj.Prog
   242  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   243  				// n * -1 = -n
   244  				n = s.Prog(x86.ANEGL)
   245  				n.To.Type = obj.TYPE_REG
   246  				n.To.Reg = x86.REG_AX
   247  			} else {
   248  				// n % -1 == 0
   249  				n = s.Prog(x86.AXORL)
   250  				n.From.Type = obj.TYPE_REG
   251  				n.From.Reg = x86.REG_DX
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_DX
   254  			}
   255  
   256  			j.To.Val = n
   257  			j2.To.Val = s.Pc()
   258  		}
   259  
   260  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   261  		// the frontend rewrites constant division by 8/16/32 bit integers into
   262  		// HMUL by a constant
   263  		// SSA rewrites generate the 64 bit versions
   264  
   265  		// Arg[0] is already in AX as it's the only register we allow
   266  		// and DX is the only output we care about (the high bits)
   267  		p := s.Prog(v.Op.Asm())
   268  		p.From.Type = obj.TYPE_REG
   269  		p.From.Reg = v.Args[1].Reg()
   270  
   271  		// IMULB puts the high portion in AH instead of DL,
   272  		// so move it to DL for consistency
   273  		if v.Type.Size() == 1 {
   274  			m := s.Prog(x86.AMOVB)
   275  			m.From.Type = obj.TYPE_REG
   276  			m.From.Reg = x86.REG_AH
   277  			m.To.Type = obj.TYPE_REG
   278  			m.To.Reg = x86.REG_DX
   279  		}
   280  
   281  	case ssa.Op386MULLQU:
   282  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   283  		p := s.Prog(v.Op.Asm())
   284  		p.From.Type = obj.TYPE_REG
   285  		p.From.Reg = v.Args[1].Reg()
   286  
   287  	case ssa.Op386AVGLU:
   288  		// compute (x+y)/2 unsigned.
   289  		// Do a 32-bit add, the overflow goes into the carry.
   290  		// Shift right once and pull the carry back into the 31st bit.
   291  		r := v.Reg()
   292  		if r != v.Args[0].Reg() {
   293  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   294  		}
   295  		p := s.Prog(x86.AADDL)
   296  		p.From.Type = obj.TYPE_REG
   297  		p.To.Type = obj.TYPE_REG
   298  		p.To.Reg = r
   299  		p.From.Reg = v.Args[1].Reg()
   300  		p = s.Prog(x86.ARCRL)
   301  		p.From.Type = obj.TYPE_CONST
   302  		p.From.Offset = 1
   303  		p.To.Type = obj.TYPE_REG
   304  		p.To.Reg = r
   305  
   306  	case ssa.Op386ADDLconst:
   307  		r := v.Reg()
   308  		a := v.Args[0].Reg()
   309  		if r == a {
   310  			if v.AuxInt == 1 {
   311  				p := s.Prog(x86.AINCL)
   312  				p.To.Type = obj.TYPE_REG
   313  				p.To.Reg = r
   314  				return
   315  			}
   316  			if v.AuxInt == -1 {
   317  				p := s.Prog(x86.ADECL)
   318  				p.To.Type = obj.TYPE_REG
   319  				p.To.Reg = r
   320  				return
   321  			}
   322  			p := s.Prog(v.Op.Asm())
   323  			p.From.Type = obj.TYPE_CONST
   324  			p.From.Offset = v.AuxInt
   325  			p.To.Type = obj.TYPE_REG
   326  			p.To.Reg = r
   327  			return
   328  		}
   329  		p := s.Prog(x86.ALEAL)
   330  		p.From.Type = obj.TYPE_MEM
   331  		p.From.Reg = a
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  
   336  	case ssa.Op386MULLconst:
   337  		r := v.Reg()
   338  		if r != v.Args[0].Reg() {
   339  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   340  		}
   341  		p := s.Prog(v.Op.Asm())
   342  		p.From.Type = obj.TYPE_CONST
   343  		p.From.Offset = v.AuxInt
   344  		p.To.Type = obj.TYPE_REG
   345  		p.To.Reg = r
   346  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   347  		// then we don't need to use resultInArg0 for these ops.
   348  		//p.From3 = new(obj.Addr)
   349  		//p.From3.Type = obj.TYPE_REG
   350  		//p.From3.Reg = v.Args[0].Reg()
   351  
   352  	case ssa.Op386SUBLconst,
   353  		ssa.Op386ADCLconst,
   354  		ssa.Op386SBBLconst,
   355  		ssa.Op386ANDLconst,
   356  		ssa.Op386ORLconst,
   357  		ssa.Op386XORLconst,
   358  		ssa.Op386SHLLconst,
   359  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   360  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   361  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   362  		r := v.Reg()
   363  		if r != v.Args[0].Reg() {
   364  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   365  		}
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = v.AuxInt
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = r
   371  	case ssa.Op386SBBLcarrymask:
   372  		r := v.Reg()
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_REG
   375  		p.From.Reg = r
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   379  		r := v.Args[0].Reg()
   380  		i := v.Args[1].Reg()
   381  		p := s.Prog(x86.ALEAL)
   382  		switch v.Op {
   383  		case ssa.Op386LEAL1:
   384  			p.From.Scale = 1
   385  			if i == x86.REG_SP {
   386  				r, i = i, r
   387  			}
   388  		case ssa.Op386LEAL2:
   389  			p.From.Scale = 2
   390  		case ssa.Op386LEAL4:
   391  			p.From.Scale = 4
   392  		case ssa.Op386LEAL8:
   393  			p.From.Scale = 8
   394  		}
   395  		p.From.Type = obj.TYPE_MEM
   396  		p.From.Reg = r
   397  		p.From.Index = i
   398  		gc.AddAux(&p.From, v)
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg()
   401  	case ssa.Op386LEAL:
   402  		p := s.Prog(x86.ALEAL)
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = v.Args[0].Reg()
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   409  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   410  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   411  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   412  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   413  		// must account for that right here.
   414  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   415  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_REG
   418  		p.From.Reg = v.Args[0].Reg()
   419  		p.To.Type = obj.TYPE_CONST
   420  		p.To.Offset = v.AuxInt
   421  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_CONST
   424  		p.From.Offset = v.AuxInt
   425  		p.To.Type = obj.TYPE_REG
   426  		p.To.Reg = v.Args[0].Reg()
   427  	case ssa.Op386MOVLconst:
   428  		x := v.Reg()
   429  
   430  		// If flags aren't live (indicated by v.Aux == nil),
   431  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   432  		if v.AuxInt == 0 && v.Aux == nil {
   433  			p := s.Prog(x86.AXORL)
   434  			p.From.Type = obj.TYPE_REG
   435  			p.From.Reg = x
   436  			p.To.Type = obj.TYPE_REG
   437  			p.To.Reg = x
   438  			break
   439  		}
   440  
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_CONST
   443  		p.From.Offset = v.AuxInt
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = x
   446  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   447  		x := v.Reg()
   448  		p := s.Prog(v.Op.Asm())
   449  		p.From.Type = obj.TYPE_FCONST
   450  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   451  		p.To.Type = obj.TYPE_REG
   452  		p.To.Reg = x
   453  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   454  		p := s.Prog(x86.ALEAL)
   455  		p.From.Type = obj.TYPE_MEM
   456  		p.From.Name = obj.NAME_EXTERN
   457  		f := math.Float64frombits(uint64(v.AuxInt))
   458  		if v.Op == ssa.Op386MOVSDconst1 {
   459  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   460  		} else {
   461  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   462  		}
   463  		p.To.Type = obj.TYPE_REG
   464  		p.To.Reg = v.Reg()
   465  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_MEM
   468  		p.From.Reg = v.Args[0].Reg()
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  
   472  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.To.Type = obj.TYPE_REG
   478  		p.To.Reg = v.Reg()
   479  	case ssa.Op386MOVSDloadidx8:
   480  		p := s.Prog(v.Op.Asm())
   481  		p.From.Type = obj.TYPE_MEM
   482  		p.From.Reg = v.Args[0].Reg()
   483  		gc.AddAux(&p.From, v)
   484  		p.From.Scale = 8
   485  		p.From.Index = v.Args[1].Reg()
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = v.Reg()
   488  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   489  		p := s.Prog(v.Op.Asm())
   490  		p.From.Type = obj.TYPE_MEM
   491  		p.From.Reg = v.Args[0].Reg()
   492  		gc.AddAux(&p.From, v)
   493  		p.From.Scale = 4
   494  		p.From.Index = v.Args[1].Reg()
   495  		p.To.Type = obj.TYPE_REG
   496  		p.To.Reg = v.Reg()
   497  	case ssa.Op386MOVWloadidx2:
   498  		p := s.Prog(v.Op.Asm())
   499  		p.From.Type = obj.TYPE_MEM
   500  		p.From.Reg = v.Args[0].Reg()
   501  		gc.AddAux(&p.From, v)
   502  		p.From.Scale = 2
   503  		p.From.Index = v.Args[1].Reg()
   504  		p.To.Type = obj.TYPE_REG
   505  		p.To.Reg = v.Reg()
   506  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   507  		r := v.Args[0].Reg()
   508  		i := v.Args[1].Reg()
   509  		if i == x86.REG_SP {
   510  			r, i = i, r
   511  		}
   512  		p := s.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_MEM
   514  		p.From.Reg = r
   515  		p.From.Scale = 1
   516  		p.From.Index = i
   517  		gc.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[1].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		gc.AddAux(&p.To, v)
   527  	case ssa.Op386MOVSDstoreidx8:
   528  		p := s.Prog(v.Op.Asm())
   529  		p.From.Type = obj.TYPE_REG
   530  		p.From.Reg = v.Args[2].Reg()
   531  		p.To.Type = obj.TYPE_MEM
   532  		p.To.Reg = v.Args[0].Reg()
   533  		p.To.Scale = 8
   534  		p.To.Index = v.Args[1].Reg()
   535  		gc.AddAux(&p.To, v)
   536  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   537  		p := s.Prog(v.Op.Asm())
   538  		p.From.Type = obj.TYPE_REG
   539  		p.From.Reg = v.Args[2].Reg()
   540  		p.To.Type = obj.TYPE_MEM
   541  		p.To.Reg = v.Args[0].Reg()
   542  		p.To.Scale = 4
   543  		p.To.Index = v.Args[1].Reg()
   544  		gc.AddAux(&p.To, v)
   545  	case ssa.Op386MOVWstoreidx2:
   546  		p := s.Prog(v.Op.Asm())
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = v.Args[2].Reg()
   549  		p.To.Type = obj.TYPE_MEM
   550  		p.To.Reg = v.Args[0].Reg()
   551  		p.To.Scale = 2
   552  		p.To.Index = v.Args[1].Reg()
   553  		gc.AddAux(&p.To, v)
   554  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   555  		r := v.Args[0].Reg()
   556  		i := v.Args[1].Reg()
   557  		if i == x86.REG_SP {
   558  			r, i = i, r
   559  		}
   560  		p := s.Prog(v.Op.Asm())
   561  		p.From.Type = obj.TYPE_REG
   562  		p.From.Reg = v.Args[2].Reg()
   563  		p.To.Type = obj.TYPE_MEM
   564  		p.To.Reg = r
   565  		p.To.Scale = 1
   566  		p.To.Index = i
   567  		gc.AddAux(&p.To, v)
   568  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   569  		p := s.Prog(v.Op.Asm())
   570  		p.From.Type = obj.TYPE_CONST
   571  		sc := v.AuxValAndOff()
   572  		p.From.Offset = sc.Val()
   573  		p.To.Type = obj.TYPE_MEM
   574  		p.To.Reg = v.Args[0].Reg()
   575  		gc.AddAux2(&p.To, v, sc.Off())
   576  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_CONST
   579  		sc := v.AuxValAndOff()
   580  		p.From.Offset = sc.Val()
   581  		r := v.Args[0].Reg()
   582  		i := v.Args[1].Reg()
   583  		switch v.Op {
   584  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   585  			p.To.Scale = 1
   586  			if i == x86.REG_SP {
   587  				r, i = i, r
   588  			}
   589  		case ssa.Op386MOVWstoreconstidx2:
   590  			p.To.Scale = 2
   591  		case ssa.Op386MOVLstoreconstidx4:
   592  			p.To.Scale = 4
   593  		}
   594  		p.To.Type = obj.TYPE_MEM
   595  		p.To.Reg = r
   596  		p.To.Index = i
   597  		gc.AddAux2(&p.To, v, sc.Off())
   598  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   599  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   600  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   601  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   602  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   603  	case ssa.Op386DUFFZERO:
   604  		p := s.Prog(obj.ADUFFZERO)
   605  		p.To.Type = obj.TYPE_ADDR
   606  		p.To.Sym = gc.Duffzero
   607  		p.To.Offset = v.AuxInt
   608  	case ssa.Op386DUFFCOPY:
   609  		p := s.Prog(obj.ADUFFCOPY)
   610  		p.To.Type = obj.TYPE_ADDR
   611  		p.To.Sym = gc.Duffcopy
   612  		p.To.Offset = v.AuxInt
   613  
   614  	case ssa.Op386MOVLconvert:
   615  		if v.Args[0].Reg() != v.Reg() {
   616  			v.Fatalf("MOVLconvert should be a no-op")
   617  		}
   618  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   619  		if v.Type.IsMemory() {
   620  			return
   621  		}
   622  		x := v.Args[0].Reg()
   623  		y := v.Reg()
   624  		if x != y {
   625  			opregreg(s, moveByType(v.Type), y, x)
   626  		}
   627  	case ssa.OpLoadReg:
   628  		if v.Type.IsFlags() {
   629  			v.Fatalf("load flags not implemented: %v", v.LongString())
   630  			return
   631  		}
   632  		p := s.Prog(loadByType(v.Type))
   633  		gc.AddrAuto(&p.From, v.Args[0])
   634  		p.To.Type = obj.TYPE_REG
   635  		p.To.Reg = v.Reg()
   636  
   637  	case ssa.OpStoreReg:
   638  		if v.Type.IsFlags() {
   639  			v.Fatalf("store flags not implemented: %v", v.LongString())
   640  			return
   641  		}
   642  		p := s.Prog(storeByType(v.Type))
   643  		p.From.Type = obj.TYPE_REG
   644  		p.From.Reg = v.Args[0].Reg()
   645  		gc.AddrAuto(&p.To, v)
   646  	case ssa.Op386LoweredGetClosurePtr:
   647  		// Closure pointer is DX.
   648  		gc.CheckLoweredGetClosurePtr(v)
   649  	case ssa.Op386LoweredGetG:
   650  		r := v.Reg()
   651  		// See the comments in cmd/internal/obj/x86/obj6.go
   652  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   653  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   654  			// MOVL (TLS), r
   655  			p := s.Prog(x86.AMOVL)
   656  			p.From.Type = obj.TYPE_MEM
   657  			p.From.Reg = x86.REG_TLS
   658  			p.To.Type = obj.TYPE_REG
   659  			p.To.Reg = r
   660  		} else {
   661  			// MOVL TLS, r
   662  			// MOVL (r)(TLS*1), r
   663  			p := s.Prog(x86.AMOVL)
   664  			p.From.Type = obj.TYPE_REG
   665  			p.From.Reg = x86.REG_TLS
   666  			p.To.Type = obj.TYPE_REG
   667  			p.To.Reg = r
   668  			q := s.Prog(x86.AMOVL)
   669  			q.From.Type = obj.TYPE_MEM
   670  			q.From.Reg = r
   671  			q.From.Index = x86.REG_TLS
   672  			q.From.Scale = 1
   673  			q.To.Type = obj.TYPE_REG
   674  			q.To.Reg = r
   675  		}
   676  
   677  	case ssa.Op386LoweredGetCallerPC:
   678  		p := s.Prog(x86.AMOVL)
   679  		p.From.Type = obj.TYPE_MEM
   680  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   681  		p.From.Name = obj.NAME_PARAM
   682  		p.To.Type = obj.TYPE_REG
   683  		p.To.Reg = v.Reg()
   684  
   685  	case ssa.Op386LoweredGetCallerSP:
   686  		// caller's SP is the address of the first arg
   687  		p := s.Prog(x86.AMOVL)
   688  		p.From.Type = obj.TYPE_ADDR
   689  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   690  		p.From.Name = obj.NAME_PARAM
   691  		p.To.Type = obj.TYPE_REG
   692  		p.To.Reg = v.Reg()
   693  
   694  	case ssa.Op386LoweredWB:
   695  		p := s.Prog(obj.ACALL)
   696  		p.To.Type = obj.TYPE_MEM
   697  		p.To.Name = obj.NAME_EXTERN
   698  		p.To.Sym = v.Aux.(*obj.LSym)
   699  
   700  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   701  		s.Call(v)
   702  	case ssa.Op386NEGL,
   703  		ssa.Op386BSWAPL,
   704  		ssa.Op386NOTL:
   705  		r := v.Reg()
   706  		if r != v.Args[0].Reg() {
   707  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   708  		}
   709  		p := s.Prog(v.Op.Asm())
   710  		p.To.Type = obj.TYPE_REG
   711  		p.To.Reg = r
   712  	case ssa.Op386BSFL, ssa.Op386BSFW,
   713  		ssa.Op386BSRL, ssa.Op386BSRW,
   714  		ssa.Op386SQRTSD:
   715  		p := s.Prog(v.Op.Asm())
   716  		p.From.Type = obj.TYPE_REG
   717  		p.From.Reg = v.Args[0].Reg()
   718  		p.To.Type = obj.TYPE_REG
   719  		p.To.Reg = v.Reg()
   720  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   721  		ssa.Op386SETL, ssa.Op386SETLE,
   722  		ssa.Op386SETG, ssa.Op386SETGE,
   723  		ssa.Op386SETGF, ssa.Op386SETGEF,
   724  		ssa.Op386SETB, ssa.Op386SETBE,
   725  		ssa.Op386SETORD, ssa.Op386SETNAN,
   726  		ssa.Op386SETA, ssa.Op386SETAE:
   727  		p := s.Prog(v.Op.Asm())
   728  		p.To.Type = obj.TYPE_REG
   729  		p.To.Reg = v.Reg()
   730  
   731  	case ssa.Op386SETNEF:
   732  		p := s.Prog(v.Op.Asm())
   733  		p.To.Type = obj.TYPE_REG
   734  		p.To.Reg = v.Reg()
   735  		q := s.Prog(x86.ASETPS)
   736  		q.To.Type = obj.TYPE_REG
   737  		q.To.Reg = x86.REG_AX
   738  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   739  
   740  	case ssa.Op386SETEQF:
   741  		p := s.Prog(v.Op.Asm())
   742  		p.To.Type = obj.TYPE_REG
   743  		p.To.Reg = v.Reg()
   744  		q := s.Prog(x86.ASETPC)
   745  		q.To.Type = obj.TYPE_REG
   746  		q.To.Reg = x86.REG_AX
   747  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   748  
   749  	case ssa.Op386InvertFlags:
   750  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   751  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   752  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   753  	case ssa.Op386REPSTOSL:
   754  		s.Prog(x86.AREP)
   755  		s.Prog(x86.ASTOSL)
   756  	case ssa.Op386REPMOVSL:
   757  		s.Prog(x86.AREP)
   758  		s.Prog(x86.AMOVSL)
   759  	case ssa.Op386LoweredNilCheck:
   760  		// Issue a load which will fault if the input is nil.
   761  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   762  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   763  		// but it doesn't have false dependency on AX.
   764  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   765  		// That trades clobbering flags for clobbering a register.
   766  		p := s.Prog(x86.ATESTB)
   767  		p.From.Type = obj.TYPE_REG
   768  		p.From.Reg = x86.REG_AX
   769  		p.To.Type = obj.TYPE_MEM
   770  		p.To.Reg = v.Args[0].Reg()
   771  		gc.AddAux(&p.To, v)
   772  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   773  			gc.Warnl(v.Pos, "generated nil check")
   774  		}
   775  	case ssa.Op386FCHS:
   776  		v.Fatalf("FCHS in non-387 mode")
   777  	case ssa.OpClobber:
   778  		p := s.Prog(x86.AMOVL)
   779  		p.From.Type = obj.TYPE_CONST
   780  		p.From.Offset = 0xdeaddead
   781  		p.To.Type = obj.TYPE_MEM
   782  		p.To.Reg = x86.REG_SP
   783  		gc.AddAux(&p.To, v)
   784  	default:
   785  		v.Fatalf("genValue not implemented: %s", v.LongString())
   786  	}
   787  }
   788  
   789  var blockJump = [...]struct {
   790  	asm, invasm obj.As
   791  }{
   792  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   793  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   794  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   795  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   796  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   797  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   798  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   799  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   800  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   801  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   802  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   803  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   804  }
   805  
   806  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   807  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   808  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   809  }
   810  var nefJumps = [2][2]gc.FloatingEQNEJump{
   811  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   812  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   813  }
   814  
   815  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   816  	switch b.Kind {
   817  	case ssa.BlockPlain:
   818  		if b.Succs[0].Block() != next {
   819  			p := s.Prog(obj.AJMP)
   820  			p.To.Type = obj.TYPE_BRANCH
   821  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   822  		}
   823  	case ssa.BlockDefer:
   824  		// defer returns in rax:
   825  		// 0 if we should continue executing
   826  		// 1 if we should jump to deferreturn call
   827  		p := s.Prog(x86.ATESTL)
   828  		p.From.Type = obj.TYPE_REG
   829  		p.From.Reg = x86.REG_AX
   830  		p.To.Type = obj.TYPE_REG
   831  		p.To.Reg = x86.REG_AX
   832  		p = s.Prog(x86.AJNE)
   833  		p.To.Type = obj.TYPE_BRANCH
   834  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   835  		if b.Succs[0].Block() != next {
   836  			p := s.Prog(obj.AJMP)
   837  			p.To.Type = obj.TYPE_BRANCH
   838  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   839  		}
   840  	case ssa.BlockExit:
   841  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   842  	case ssa.BlockRet:
   843  		s.Prog(obj.ARET)
   844  	case ssa.BlockRetJmp:
   845  		p := s.Prog(obj.AJMP)
   846  		p.To.Type = obj.TYPE_MEM
   847  		p.To.Name = obj.NAME_EXTERN
   848  		p.To.Sym = b.Aux.(*obj.LSym)
   849  
   850  	case ssa.Block386EQF:
   851  		s.FPJump(b, next, &eqfJumps)
   852  
   853  	case ssa.Block386NEF:
   854  		s.FPJump(b, next, &nefJumps)
   855  
   856  	case ssa.Block386EQ, ssa.Block386NE,
   857  		ssa.Block386LT, ssa.Block386GE,
   858  		ssa.Block386LE, ssa.Block386GT,
   859  		ssa.Block386ULT, ssa.Block386UGT,
   860  		ssa.Block386ULE, ssa.Block386UGE:
   861  		jmp := blockJump[b.Kind]
   862  		var p *obj.Prog
   863  		switch next {
   864  		case b.Succs[0].Block():
   865  			p = s.Prog(jmp.invasm)
   866  			p.To.Type = obj.TYPE_BRANCH
   867  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   868  		case b.Succs[1].Block():
   869  			p = s.Prog(jmp.asm)
   870  			p.To.Type = obj.TYPE_BRANCH
   871  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   872  		default:
   873  			p = s.Prog(jmp.asm)
   874  			p.To.Type = obj.TYPE_BRANCH
   875  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   876  			q := s.Prog(obj.AJMP)
   877  			q.To.Type = obj.TYPE_BRANCH
   878  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   879  		}
   880  
   881  	default:
   882  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   883  	}
   884  }