github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/x86"
    15  )
    16  
    17  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    18  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    19  	flive := b.FlagsLiveAtEnd
    20  	if b.Control != nil && b.Control.Type.IsFlags() {
    21  		flive = true
    22  	}
    23  	for i := len(b.Values) - 1; i >= 0; i-- {
    24  		v := b.Values[i]
    25  		if flive && v.Op == ssa.Op386MOVLconst {
    26  			// The "mark" is any non-nil Aux value.
    27  			v.Aux = v
    28  		}
    29  		if v.Type.IsFlags() {
    30  			flive = false
    31  		}
    32  		for _, a := range v.Args {
    33  			if a.Type.IsFlags() {
    34  				flive = true
    35  			}
    36  		}
    37  	}
    38  }
    39  
    40  // loadByType returns the load instruction of the given type.
    41  func loadByType(t ssa.Type) obj.As {
    42  	// Avoid partial register write
    43  	if !t.IsFloat() && t.Size() <= 2 {
    44  		if t.Size() == 1 {
    45  			return x86.AMOVBLZX
    46  		} else {
    47  			return x86.AMOVWLZX
    48  		}
    49  	}
    50  	// Otherwise, there's no difference between load and store opcodes.
    51  	return storeByType(t)
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t ssa.Type) obj.As {
    56  	width := t.Size()
    57  	if t.IsFloat() {
    58  		switch width {
    59  		case 4:
    60  			return x86.AMOVSS
    61  		case 8:
    62  			return x86.AMOVSD
    63  		}
    64  	} else {
    65  		switch width {
    66  		case 1:
    67  			return x86.AMOVB
    68  		case 2:
    69  			return x86.AMOVW
    70  		case 4:
    71  			return x86.AMOVL
    72  		}
    73  	}
    74  	panic("bad store type")
    75  }
    76  
    77  // moveByType returns the reg->reg move instruction of the given type.
    78  func moveByType(t ssa.Type) obj.As {
    79  	if t.IsFloat() {
    80  		switch t.Size() {
    81  		case 4:
    82  			return x86.AMOVSS
    83  		case 8:
    84  			return x86.AMOVSD
    85  		default:
    86  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    87  		}
    88  	} else {
    89  		switch t.Size() {
    90  		case 1:
    91  			// Avoids partial register write
    92  			return x86.AMOVL
    93  		case 2:
    94  			return x86.AMOVL
    95  		case 4:
    96  			return x86.AMOVL
    97  		default:
    98  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
    99  		}
   100  	}
   101  }
   102  
   103  // opregreg emits instructions for
   104  //     dest := dest(To) op src(From)
   105  // and also returns the created obj.Prog so it
   106  // may be further adjusted (offset, scale, etc).
   107  func opregreg(op obj.As, dest, src int16) *obj.Prog {
   108  	p := gc.Prog(op)
   109  	p.From.Type = obj.TYPE_REG
   110  	p.To.Type = obj.TYPE_REG
   111  	p.To.Reg = dest
   112  	p.From.Reg = src
   113  	return p
   114  }
   115  
   116  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   117  	s.SetPos(v.Pos)
   118  
   119  	if gc.Thearch.Use387 {
   120  		if ssaGenValue387(s, v) {
   121  			return // v was handled by 387 generation.
   122  		}
   123  	}
   124  
   125  	switch v.Op {
   126  	case ssa.Op386ADDL:
   127  		r := v.Reg()
   128  		r1 := v.Args[0].Reg()
   129  		r2 := v.Args[1].Reg()
   130  		switch {
   131  		case r == r1:
   132  			p := gc.Prog(v.Op.Asm())
   133  			p.From.Type = obj.TYPE_REG
   134  			p.From.Reg = r2
   135  			p.To.Type = obj.TYPE_REG
   136  			p.To.Reg = r
   137  		case r == r2:
   138  			p := gc.Prog(v.Op.Asm())
   139  			p.From.Type = obj.TYPE_REG
   140  			p.From.Reg = r1
   141  			p.To.Type = obj.TYPE_REG
   142  			p.To.Reg = r
   143  		default:
   144  			p := gc.Prog(x86.ALEAL)
   145  			p.From.Type = obj.TYPE_MEM
   146  			p.From.Reg = r1
   147  			p.From.Scale = 1
   148  			p.From.Index = r2
   149  			p.To.Type = obj.TYPE_REG
   150  			p.To.Reg = r
   151  		}
   152  
   153  	// 2-address opcode arithmetic
   154  	case ssa.Op386SUBL,
   155  		ssa.Op386MULL,
   156  		ssa.Op386ANDL,
   157  		ssa.Op386ORL,
   158  		ssa.Op386XORL,
   159  		ssa.Op386SHLL,
   160  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   161  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   162  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   163  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   164  		ssa.Op386PXOR,
   165  		ssa.Op386ADCL,
   166  		ssa.Op386SBBL:
   167  		r := v.Reg()
   168  		if r != v.Args[0].Reg() {
   169  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   170  		}
   171  		opregreg(v.Op.Asm(), r, v.Args[1].Reg())
   172  
   173  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   174  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   175  		r := v.Reg0()
   176  		if r != v.Args[0].Reg() {
   177  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   178  		}
   179  		opregreg(v.Op.Asm(), r, v.Args[1].Reg())
   180  
   181  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   182  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   183  		r := v.Reg0()
   184  		if r != v.Args[0].Reg() {
   185  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   186  		}
   187  		p := gc.Prog(v.Op.Asm())
   188  		p.From.Type = obj.TYPE_CONST
   189  		p.From.Offset = v.AuxInt
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = r
   192  
   193  	case ssa.Op386DIVL, ssa.Op386DIVW,
   194  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   195  		ssa.Op386MODL, ssa.Op386MODW,
   196  		ssa.Op386MODLU, ssa.Op386MODWU:
   197  
   198  		// Arg[0] is already in AX as it's the only register we allow
   199  		// and AX is the only output
   200  		x := v.Args[1].Reg()
   201  
   202  		// CPU faults upon signed overflow, which occurs when most
   203  		// negative int is divided by -1.
   204  		var j *obj.Prog
   205  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   206  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   207  
   208  			var c *obj.Prog
   209  			switch v.Op {
   210  			case ssa.Op386DIVL, ssa.Op386MODL:
   211  				c = gc.Prog(x86.ACMPL)
   212  				j = gc.Prog(x86.AJEQ)
   213  				gc.Prog(x86.ACDQ) //TODO: fix
   214  
   215  			case ssa.Op386DIVW, ssa.Op386MODW:
   216  				c = gc.Prog(x86.ACMPW)
   217  				j = gc.Prog(x86.AJEQ)
   218  				gc.Prog(x86.ACWD)
   219  			}
   220  			c.From.Type = obj.TYPE_REG
   221  			c.From.Reg = x
   222  			c.To.Type = obj.TYPE_CONST
   223  			c.To.Offset = -1
   224  
   225  			j.To.Type = obj.TYPE_BRANCH
   226  		}
   227  
   228  		// for unsigned ints, we sign extend by setting DX = 0
   229  		// signed ints were sign extended above
   230  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   231  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   232  			c := gc.Prog(x86.AXORL)
   233  			c.From.Type = obj.TYPE_REG
   234  			c.From.Reg = x86.REG_DX
   235  			c.To.Type = obj.TYPE_REG
   236  			c.To.Reg = x86.REG_DX
   237  		}
   238  
   239  		p := gc.Prog(v.Op.Asm())
   240  		p.From.Type = obj.TYPE_REG
   241  		p.From.Reg = x
   242  
   243  		// signed division, rest of the check for -1 case
   244  		if j != nil {
   245  			j2 := gc.Prog(obj.AJMP)
   246  			j2.To.Type = obj.TYPE_BRANCH
   247  
   248  			var n *obj.Prog
   249  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   250  				// n * -1 = -n
   251  				n = gc.Prog(x86.ANEGL)
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_AX
   254  			} else {
   255  				// n % -1 == 0
   256  				n = gc.Prog(x86.AXORL)
   257  				n.From.Type = obj.TYPE_REG
   258  				n.From.Reg = x86.REG_DX
   259  				n.To.Type = obj.TYPE_REG
   260  				n.To.Reg = x86.REG_DX
   261  			}
   262  
   263  			j.To.Val = n
   264  			j2.To.Val = s.Pc()
   265  		}
   266  
   267  	case ssa.Op386HMULL, ssa.Op386HMULW, ssa.Op386HMULB,
   268  		ssa.Op386HMULLU, ssa.Op386HMULWU, ssa.Op386HMULBU:
   269  		// the frontend rewrites constant division by 8/16/32 bit integers into
   270  		// HMUL by a constant
   271  		// SSA rewrites generate the 64 bit versions
   272  
   273  		// Arg[0] is already in AX as it's the only register we allow
   274  		// and DX is the only output we care about (the high bits)
   275  		p := gc.Prog(v.Op.Asm())
   276  		p.From.Type = obj.TYPE_REG
   277  		p.From.Reg = v.Args[1].Reg()
   278  
   279  		// IMULB puts the high portion in AH instead of DL,
   280  		// so move it to DL for consistency
   281  		if v.Type.Size() == 1 {
   282  			m := gc.Prog(x86.AMOVB)
   283  			m.From.Type = obj.TYPE_REG
   284  			m.From.Reg = x86.REG_AH
   285  			m.To.Type = obj.TYPE_REG
   286  			m.To.Reg = x86.REG_DX
   287  		}
   288  
   289  	case ssa.Op386MULLQU:
   290  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   291  		p := gc.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = v.Args[1].Reg()
   294  
   295  	case ssa.Op386ADDLconst:
   296  		r := v.Reg()
   297  		a := v.Args[0].Reg()
   298  		if r == a {
   299  			if v.AuxInt == 1 {
   300  				p := gc.Prog(x86.AINCL)
   301  				p.To.Type = obj.TYPE_REG
   302  				p.To.Reg = r
   303  				return
   304  			}
   305  			if v.AuxInt == -1 {
   306  				p := gc.Prog(x86.ADECL)
   307  				p.To.Type = obj.TYPE_REG
   308  				p.To.Reg = r
   309  				return
   310  			}
   311  			p := gc.Prog(v.Op.Asm())
   312  			p.From.Type = obj.TYPE_CONST
   313  			p.From.Offset = v.AuxInt
   314  			p.To.Type = obj.TYPE_REG
   315  			p.To.Reg = r
   316  			return
   317  		}
   318  		p := gc.Prog(x86.ALEAL)
   319  		p.From.Type = obj.TYPE_MEM
   320  		p.From.Reg = a
   321  		p.From.Offset = v.AuxInt
   322  		p.To.Type = obj.TYPE_REG
   323  		p.To.Reg = r
   324  
   325  	case ssa.Op386MULLconst:
   326  		r := v.Reg()
   327  		if r != v.Args[0].Reg() {
   328  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   329  		}
   330  		p := gc.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_CONST
   332  		p.From.Offset = v.AuxInt
   333  		p.To.Type = obj.TYPE_REG
   334  		p.To.Reg = r
   335  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   336  		// then we don't need to use resultInArg0 for these ops.
   337  		//p.From3 = new(obj.Addr)
   338  		//p.From3.Type = obj.TYPE_REG
   339  		//p.From3.Reg = v.Args[0].Reg()
   340  
   341  	case ssa.Op386SUBLconst,
   342  		ssa.Op386ADCLconst,
   343  		ssa.Op386SBBLconst,
   344  		ssa.Op386ANDLconst,
   345  		ssa.Op386ORLconst,
   346  		ssa.Op386XORLconst,
   347  		ssa.Op386SHLLconst,
   348  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   349  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   350  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   351  		r := v.Reg()
   352  		if r != v.Args[0].Reg() {
   353  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   354  		}
   355  		p := gc.Prog(v.Op.Asm())
   356  		p.From.Type = obj.TYPE_CONST
   357  		p.From.Offset = v.AuxInt
   358  		p.To.Type = obj.TYPE_REG
   359  		p.To.Reg = r
   360  	case ssa.Op386SBBLcarrymask:
   361  		r := v.Reg()
   362  		p := gc.Prog(v.Op.Asm())
   363  		p.From.Type = obj.TYPE_REG
   364  		p.From.Reg = r
   365  		p.To.Type = obj.TYPE_REG
   366  		p.To.Reg = r
   367  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   368  		r := v.Args[0].Reg()
   369  		i := v.Args[1].Reg()
   370  		p := gc.Prog(x86.ALEAL)
   371  		switch v.Op {
   372  		case ssa.Op386LEAL1:
   373  			p.From.Scale = 1
   374  			if i == x86.REG_SP {
   375  				r, i = i, r
   376  			}
   377  		case ssa.Op386LEAL2:
   378  			p.From.Scale = 2
   379  		case ssa.Op386LEAL4:
   380  			p.From.Scale = 4
   381  		case ssa.Op386LEAL8:
   382  			p.From.Scale = 8
   383  		}
   384  		p.From.Type = obj.TYPE_MEM
   385  		p.From.Reg = r
   386  		p.From.Index = i
   387  		gc.AddAux(&p.From, v)
   388  		p.To.Type = obj.TYPE_REG
   389  		p.To.Reg = v.Reg()
   390  	case ssa.Op386LEAL:
   391  		p := gc.Prog(x86.ALEAL)
   392  		p.From.Type = obj.TYPE_MEM
   393  		p.From.Reg = v.Args[0].Reg()
   394  		gc.AddAux(&p.From, v)
   395  		p.To.Type = obj.TYPE_REG
   396  		p.To.Reg = v.Reg()
   397  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   398  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   399  		opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   400  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   401  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   402  		// must account for that right here.
   403  		opregreg(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   404  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   405  		p := gc.Prog(v.Op.Asm())
   406  		p.From.Type = obj.TYPE_REG
   407  		p.From.Reg = v.Args[0].Reg()
   408  		p.To.Type = obj.TYPE_CONST
   409  		p.To.Offset = v.AuxInt
   410  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   411  		p := gc.Prog(v.Op.Asm())
   412  		p.From.Type = obj.TYPE_CONST
   413  		p.From.Offset = v.AuxInt
   414  		p.To.Type = obj.TYPE_REG
   415  		p.To.Reg = v.Args[0].Reg()
   416  	case ssa.Op386MOVLconst:
   417  		x := v.Reg()
   418  		p := gc.Prog(v.Op.Asm())
   419  		p.From.Type = obj.TYPE_CONST
   420  		p.From.Offset = v.AuxInt
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = x
   423  		// If flags are live at this instruction, suppress the
   424  		// MOV $0,AX -> XOR AX,AX optimization.
   425  		if v.Aux != nil {
   426  			p.Mark |= x86.PRESERVEFLAGS
   427  		}
   428  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   429  		x := v.Reg()
   430  		p := gc.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_FCONST
   432  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   433  		p.To.Type = obj.TYPE_REG
   434  		p.To.Reg = x
   435  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   436  		var literal string
   437  		if v.Op == ssa.Op386MOVSDconst1 {
   438  			literal = fmt.Sprintf("$f64.%016x", uint64(v.AuxInt))
   439  		} else {
   440  			literal = fmt.Sprintf("$f32.%08x", math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt)))))
   441  		}
   442  		p := gc.Prog(x86.ALEAL)
   443  		p.From.Type = obj.TYPE_MEM
   444  		p.From.Name = obj.NAME_EXTERN
   445  		p.From.Sym = obj.Linklookup(gc.Ctxt, literal, 0)
   446  		p.From.Sym.Set(obj.AttrLocal, true)
   447  		p.To.Type = obj.TYPE_REG
   448  		p.To.Reg = v.Reg()
   449  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   450  		p := gc.Prog(v.Op.Asm())
   451  		p.From.Type = obj.TYPE_MEM
   452  		p.From.Reg = v.Args[0].Reg()
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = v.Reg()
   455  
   456  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   457  		p := gc.Prog(v.Op.Asm())
   458  		p.From.Type = obj.TYPE_MEM
   459  		p.From.Reg = v.Args[0].Reg()
   460  		gc.AddAux(&p.From, v)
   461  		p.To.Type = obj.TYPE_REG
   462  		p.To.Reg = v.Reg()
   463  	case ssa.Op386MOVSDloadidx8:
   464  		p := gc.Prog(v.Op.Asm())
   465  		p.From.Type = obj.TYPE_MEM
   466  		p.From.Reg = v.Args[0].Reg()
   467  		gc.AddAux(&p.From, v)
   468  		p.From.Scale = 8
   469  		p.From.Index = v.Args[1].Reg()
   470  		p.To.Type = obj.TYPE_REG
   471  		p.To.Reg = v.Reg()
   472  	case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   473  		p := gc.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_MEM
   475  		p.From.Reg = v.Args[0].Reg()
   476  		gc.AddAux(&p.From, v)
   477  		p.From.Scale = 4
   478  		p.From.Index = v.Args[1].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  	case ssa.Op386MOVWloadidx2:
   482  		p := gc.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		gc.AddAux(&p.From, v)
   486  		p.From.Scale = 2
   487  		p.From.Index = v.Args[1].Reg()
   488  		p.To.Type = obj.TYPE_REG
   489  		p.To.Reg = v.Reg()
   490  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   491  		r := v.Args[0].Reg()
   492  		i := v.Args[1].Reg()
   493  		if i == x86.REG_SP {
   494  			r, i = i, r
   495  		}
   496  		p := gc.Prog(v.Op.Asm())
   497  		p.From.Type = obj.TYPE_MEM
   498  		p.From.Reg = r
   499  		p.From.Scale = 1
   500  		p.From.Index = i
   501  		gc.AddAux(&p.From, v)
   502  		p.To.Type = obj.TYPE_REG
   503  		p.To.Reg = v.Reg()
   504  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
   505  		p := gc.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_REG
   507  		p.From.Reg = v.Args[1].Reg()
   508  		p.To.Type = obj.TYPE_MEM
   509  		p.To.Reg = v.Args[0].Reg()
   510  		gc.AddAux(&p.To, v)
   511  	case ssa.Op386MOVSDstoreidx8:
   512  		p := gc.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_REG
   514  		p.From.Reg = v.Args[2].Reg()
   515  		p.To.Type = obj.TYPE_MEM
   516  		p.To.Reg = v.Args[0].Reg()
   517  		p.To.Scale = 8
   518  		p.To.Index = v.Args[1].Reg()
   519  		gc.AddAux(&p.To, v)
   520  	case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
   521  		p := gc.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_REG
   523  		p.From.Reg = v.Args[2].Reg()
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		p.To.Scale = 4
   527  		p.To.Index = v.Args[1].Reg()
   528  		gc.AddAux(&p.To, v)
   529  	case ssa.Op386MOVWstoreidx2:
   530  		p := gc.Prog(v.Op.Asm())
   531  		p.From.Type = obj.TYPE_REG
   532  		p.From.Reg = v.Args[2].Reg()
   533  		p.To.Type = obj.TYPE_MEM
   534  		p.To.Reg = v.Args[0].Reg()
   535  		p.To.Scale = 2
   536  		p.To.Index = v.Args[1].Reg()
   537  		gc.AddAux(&p.To, v)
   538  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   539  		r := v.Args[0].Reg()
   540  		i := v.Args[1].Reg()
   541  		if i == x86.REG_SP {
   542  			r, i = i, r
   543  		}
   544  		p := gc.Prog(v.Op.Asm())
   545  		p.From.Type = obj.TYPE_REG
   546  		p.From.Reg = v.Args[2].Reg()
   547  		p.To.Type = obj.TYPE_MEM
   548  		p.To.Reg = r
   549  		p.To.Scale = 1
   550  		p.To.Index = i
   551  		gc.AddAux(&p.To, v)
   552  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   553  		p := gc.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_CONST
   555  		sc := v.AuxValAndOff()
   556  		p.From.Offset = sc.Val()
   557  		p.To.Type = obj.TYPE_MEM
   558  		p.To.Reg = v.Args[0].Reg()
   559  		gc.AddAux2(&p.To, v, sc.Off())
   560  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
   561  		p := gc.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_CONST
   563  		sc := v.AuxValAndOff()
   564  		p.From.Offset = sc.Val()
   565  		r := v.Args[0].Reg()
   566  		i := v.Args[1].Reg()
   567  		switch v.Op {
   568  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   569  			p.To.Scale = 1
   570  			if i == x86.REG_SP {
   571  				r, i = i, r
   572  			}
   573  		case ssa.Op386MOVWstoreconstidx2:
   574  			p.To.Scale = 2
   575  		case ssa.Op386MOVLstoreconstidx4:
   576  			p.To.Scale = 4
   577  		}
   578  		p.To.Type = obj.TYPE_MEM
   579  		p.To.Reg = r
   580  		p.To.Index = i
   581  		gc.AddAux2(&p.To, v, sc.Off())
   582  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   583  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   584  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   585  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   586  		opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   587  	case ssa.Op386DUFFZERO:
   588  		p := gc.Prog(obj.ADUFFZERO)
   589  		p.To.Type = obj.TYPE_ADDR
   590  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   591  		p.To.Offset = v.AuxInt
   592  	case ssa.Op386DUFFCOPY:
   593  		p := gc.Prog(obj.ADUFFCOPY)
   594  		p.To.Type = obj.TYPE_ADDR
   595  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
   596  		p.To.Offset = v.AuxInt
   597  
   598  	case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   599  		if v.Type.IsMemory() {
   600  			return
   601  		}
   602  		x := v.Args[0].Reg()
   603  		y := v.Reg()
   604  		if x != y {
   605  			opregreg(moveByType(v.Type), y, x)
   606  		}
   607  	case ssa.OpLoadReg:
   608  		if v.Type.IsFlags() {
   609  			v.Fatalf("load flags not implemented: %v", v.LongString())
   610  			return
   611  		}
   612  		p := gc.Prog(loadByType(v.Type))
   613  		gc.AddrAuto(&p.From, v.Args[0])
   614  		p.To.Type = obj.TYPE_REG
   615  		p.To.Reg = v.Reg()
   616  
   617  	case ssa.OpStoreReg:
   618  		if v.Type.IsFlags() {
   619  			v.Fatalf("store flags not implemented: %v", v.LongString())
   620  			return
   621  		}
   622  		p := gc.Prog(storeByType(v.Type))
   623  		p.From.Type = obj.TYPE_REG
   624  		p.From.Reg = v.Args[0].Reg()
   625  		gc.AddrAuto(&p.To, v)
   626  	case ssa.OpPhi:
   627  		gc.CheckLoweredPhi(v)
   628  	case ssa.OpInitMem:
   629  		// memory arg needs no code
   630  	case ssa.OpArg:
   631  		// input args need no code
   632  	case ssa.Op386LoweredGetClosurePtr:
   633  		// Closure pointer is DX.
   634  		gc.CheckLoweredGetClosurePtr(v)
   635  	case ssa.Op386LoweredGetG:
   636  		r := v.Reg()
   637  		// See the comments in cmd/internal/obj/x86/obj6.go
   638  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   639  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   640  			// MOVL (TLS), r
   641  			p := gc.Prog(x86.AMOVL)
   642  			p.From.Type = obj.TYPE_MEM
   643  			p.From.Reg = x86.REG_TLS
   644  			p.To.Type = obj.TYPE_REG
   645  			p.To.Reg = r
   646  		} else {
   647  			// MOVL TLS, r
   648  			// MOVL (r)(TLS*1), r
   649  			p := gc.Prog(x86.AMOVL)
   650  			p.From.Type = obj.TYPE_REG
   651  			p.From.Reg = x86.REG_TLS
   652  			p.To.Type = obj.TYPE_REG
   653  			p.To.Reg = r
   654  			q := gc.Prog(x86.AMOVL)
   655  			q.From.Type = obj.TYPE_MEM
   656  			q.From.Reg = r
   657  			q.From.Index = x86.REG_TLS
   658  			q.From.Scale = 1
   659  			q.To.Type = obj.TYPE_REG
   660  			q.To.Reg = r
   661  		}
   662  	case ssa.Op386CALLstatic:
   663  		if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
   664  			// Deferred calls will appear to be returning to
   665  			// the CALL deferreturn(SB) that we are about to emit.
   666  			// However, the stack trace code will show the line
   667  			// of the instruction byte before the return PC.
   668  			// To avoid that being an unrelated instruction,
   669  			// insert an actual hardware NOP that will have the right line number.
   670  			// This is different from obj.ANOP, which is a virtual no-op
   671  			// that doesn't make it into the instruction stream.
   672  			ginsnop()
   673  		}
   674  		p := gc.Prog(obj.ACALL)
   675  		p.To.Type = obj.TYPE_MEM
   676  		p.To.Name = obj.NAME_EXTERN
   677  		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
   678  		if gc.Maxarg < v.AuxInt {
   679  			gc.Maxarg = v.AuxInt
   680  		}
   681  	case ssa.Op386CALLclosure:
   682  		p := gc.Prog(obj.ACALL)
   683  		p.To.Type = obj.TYPE_REG
   684  		p.To.Reg = v.Args[0].Reg()
   685  		if gc.Maxarg < v.AuxInt {
   686  			gc.Maxarg = v.AuxInt
   687  		}
   688  	case ssa.Op386CALLdefer:
   689  		p := gc.Prog(obj.ACALL)
   690  		p.To.Type = obj.TYPE_MEM
   691  		p.To.Name = obj.NAME_EXTERN
   692  		p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
   693  		if gc.Maxarg < v.AuxInt {
   694  			gc.Maxarg = v.AuxInt
   695  		}
   696  	case ssa.Op386CALLgo:
   697  		p := gc.Prog(obj.ACALL)
   698  		p.To.Type = obj.TYPE_MEM
   699  		p.To.Name = obj.NAME_EXTERN
   700  		p.To.Sym = gc.Linksym(gc.Newproc.Sym)
   701  		if gc.Maxarg < v.AuxInt {
   702  			gc.Maxarg = v.AuxInt
   703  		}
   704  	case ssa.Op386CALLinter:
   705  		p := gc.Prog(obj.ACALL)
   706  		p.To.Type = obj.TYPE_REG
   707  		p.To.Reg = v.Args[0].Reg()
   708  		if gc.Maxarg < v.AuxInt {
   709  			gc.Maxarg = v.AuxInt
   710  		}
   711  	case ssa.Op386NEGL,
   712  		ssa.Op386BSWAPL,
   713  		ssa.Op386NOTL:
   714  		r := v.Reg()
   715  		if r != v.Args[0].Reg() {
   716  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   717  		}
   718  		p := gc.Prog(v.Op.Asm())
   719  		p.To.Type = obj.TYPE_REG
   720  		p.To.Reg = r
   721  	case ssa.Op386BSFL, ssa.Op386BSFW,
   722  		ssa.Op386BSRL, ssa.Op386BSRW,
   723  		ssa.Op386SQRTSD:
   724  		p := gc.Prog(v.Op.Asm())
   725  		p.From.Type = obj.TYPE_REG
   726  		p.From.Reg = v.Args[0].Reg()
   727  		p.To.Type = obj.TYPE_REG
   728  		p.To.Reg = v.Reg()
   729  	case ssa.OpSP, ssa.OpSB, ssa.OpSelect0, ssa.OpSelect1:
   730  		// nothing to do
   731  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   732  		ssa.Op386SETL, ssa.Op386SETLE,
   733  		ssa.Op386SETG, ssa.Op386SETGE,
   734  		ssa.Op386SETGF, ssa.Op386SETGEF,
   735  		ssa.Op386SETB, ssa.Op386SETBE,
   736  		ssa.Op386SETORD, ssa.Op386SETNAN,
   737  		ssa.Op386SETA, ssa.Op386SETAE:
   738  		p := gc.Prog(v.Op.Asm())
   739  		p.To.Type = obj.TYPE_REG
   740  		p.To.Reg = v.Reg()
   741  
   742  	case ssa.Op386SETNEF:
   743  		p := gc.Prog(v.Op.Asm())
   744  		p.To.Type = obj.TYPE_REG
   745  		p.To.Reg = v.Reg()
   746  		q := gc.Prog(x86.ASETPS)
   747  		q.To.Type = obj.TYPE_REG
   748  		q.To.Reg = x86.REG_AX
   749  		opregreg(x86.AORL, v.Reg(), x86.REG_AX)
   750  
   751  	case ssa.Op386SETEQF:
   752  		p := gc.Prog(v.Op.Asm())
   753  		p.To.Type = obj.TYPE_REG
   754  		p.To.Reg = v.Reg()
   755  		q := gc.Prog(x86.ASETPC)
   756  		q.To.Type = obj.TYPE_REG
   757  		q.To.Reg = x86.REG_AX
   758  		opregreg(x86.AANDL, v.Reg(), x86.REG_AX)
   759  
   760  	case ssa.Op386InvertFlags:
   761  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   762  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   763  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   764  	case ssa.Op386REPSTOSL:
   765  		gc.Prog(x86.AREP)
   766  		gc.Prog(x86.ASTOSL)
   767  	case ssa.Op386REPMOVSL:
   768  		gc.Prog(x86.AREP)
   769  		gc.Prog(x86.AMOVSL)
   770  	case ssa.OpVarDef:
   771  		gc.Gvardef(v.Aux.(*gc.Node))
   772  	case ssa.OpVarKill:
   773  		gc.Gvarkill(v.Aux.(*gc.Node))
   774  	case ssa.OpVarLive:
   775  		gc.Gvarlive(v.Aux.(*gc.Node))
   776  	case ssa.OpKeepAlive:
   777  		gc.KeepAlive(v)
   778  	case ssa.Op386LoweredNilCheck:
   779  		// Issue a load which will fault if the input is nil.
   780  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   781  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   782  		// but it doesn't have false dependency on AX.
   783  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   784  		// That trades clobbering flags for clobbering a register.
   785  		p := gc.Prog(x86.ATESTB)
   786  		p.From.Type = obj.TYPE_REG
   787  		p.From.Reg = x86.REG_AX
   788  		p.To.Type = obj.TYPE_MEM
   789  		p.To.Reg = v.Args[0].Reg()
   790  		gc.AddAux(&p.To, v)
   791  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   792  			gc.Warnl(v.Pos, "generated nil check")
   793  		}
   794  	case ssa.Op386FCHS:
   795  		v.Fatalf("FCHS in non-387 mode")
   796  	default:
   797  		v.Fatalf("genValue not implemented: %s", v.LongString())
   798  	}
   799  }
   800  
   801  var blockJump = [...]struct {
   802  	asm, invasm obj.As
   803  }{
   804  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   805  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   806  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   807  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   808  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   809  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   810  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   811  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   812  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   813  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   814  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   815  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   816  }
   817  
   818  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   819  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   820  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   821  }
   822  var nefJumps = [2][2]gc.FloatingEQNEJump{
   823  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   824  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   825  }
   826  
   827  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   828  	s.SetPos(b.Pos)
   829  
   830  	if gc.Thearch.Use387 {
   831  		// Empty the 387's FP stack before the block ends.
   832  		flush387(s)
   833  	}
   834  
   835  	switch b.Kind {
   836  	case ssa.BlockPlain:
   837  		if b.Succs[0].Block() != next {
   838  			p := gc.Prog(obj.AJMP)
   839  			p.To.Type = obj.TYPE_BRANCH
   840  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   841  		}
   842  	case ssa.BlockDefer:
   843  		// defer returns in rax:
   844  		// 0 if we should continue executing
   845  		// 1 if we should jump to deferreturn call
   846  		p := gc.Prog(x86.ATESTL)
   847  		p.From.Type = obj.TYPE_REG
   848  		p.From.Reg = x86.REG_AX
   849  		p.To.Type = obj.TYPE_REG
   850  		p.To.Reg = x86.REG_AX
   851  		p = gc.Prog(x86.AJNE)
   852  		p.To.Type = obj.TYPE_BRANCH
   853  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   854  		if b.Succs[0].Block() != next {
   855  			p := gc.Prog(obj.AJMP)
   856  			p.To.Type = obj.TYPE_BRANCH
   857  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   858  		}
   859  	case ssa.BlockExit:
   860  		gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   861  	case ssa.BlockRet:
   862  		gc.Prog(obj.ARET)
   863  	case ssa.BlockRetJmp:
   864  		p := gc.Prog(obj.AJMP)
   865  		p.To.Type = obj.TYPE_MEM
   866  		p.To.Name = obj.NAME_EXTERN
   867  		p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
   868  
   869  	case ssa.Block386EQF:
   870  		gc.SSAGenFPJump(s, b, next, &eqfJumps)
   871  
   872  	case ssa.Block386NEF:
   873  		gc.SSAGenFPJump(s, b, next, &nefJumps)
   874  
   875  	case ssa.Block386EQ, ssa.Block386NE,
   876  		ssa.Block386LT, ssa.Block386GE,
   877  		ssa.Block386LE, ssa.Block386GT,
   878  		ssa.Block386ULT, ssa.Block386UGT,
   879  		ssa.Block386ULE, ssa.Block386UGE:
   880  		jmp := blockJump[b.Kind]
   881  		likely := b.Likely
   882  		var p *obj.Prog
   883  		switch next {
   884  		case b.Succs[0].Block():
   885  			p = gc.Prog(jmp.invasm)
   886  			likely *= -1
   887  			p.To.Type = obj.TYPE_BRANCH
   888  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   889  		case b.Succs[1].Block():
   890  			p = gc.Prog(jmp.asm)
   891  			p.To.Type = obj.TYPE_BRANCH
   892  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   893  		default:
   894  			p = gc.Prog(jmp.asm)
   895  			p.To.Type = obj.TYPE_BRANCH
   896  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   897  			q := gc.Prog(obj.AJMP)
   898  			q.To.Type = obj.TYPE_BRANCH
   899  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   900  		}
   901  
   902  		// liblink reorders the instruction stream as it sees fit.
   903  		// Pass along what we know so liblink can make use of it.
   904  		// TODO: Once we've fully switched to SSA,
   905  		// make liblink leave our output alone.
   906  		switch likely {
   907  		case ssa.BranchUnlikely:
   908  			p.From.Type = obj.TYPE_CONST
   909  			p.From.Offset = 0
   910  		case ssa.BranchLikely:
   911  			p.From.Type = obj.TYPE_CONST
   912  			p.From.Offset = 1
   913  		}
   914  
   915  	default:
   916  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   917  	}
   918  }