github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
    12  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ir"
    13  	"github.com/bir3/gocompiler/src/cmd/compile/internal/logopt"
    14  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssa"
    15  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen"
    16  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
    17  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    18  	"github.com/bir3/gocompiler/src/cmd/internal/obj/x86"
    19  )
    20  
    21  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    22  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    23  	flive := b.FlagsLiveAtEnd
    24  	for _, c := range b.ControlValues() {
    25  		flive = c.Type.IsFlags() || flive
    26  	}
    27  	for i := len(b.Values) - 1; i >= 0; i-- {
    28  		v := b.Values[i]
    29  		if flive && v.Op == ssa.Op386MOVLconst {
    30  			// The "mark" is any non-nil Aux value.
    31  			v.Aux = v
    32  		}
    33  		if v.Type.IsFlags() {
    34  			flive = false
    35  		}
    36  		for _, a := range v.Args {
    37  			if a.Type.IsFlags() {
    38  				flive = true
    39  			}
    40  		}
    41  	}
    42  }
    43  
    44  // loadByType returns the load instruction of the given type.
    45  func loadByType(t *types.Type) obj.As {
    46  	// Avoid partial register write
    47  	if !t.IsFloat() {
    48  		switch t.Size() {
    49  		case 1:
    50  			return x86.AMOVBLZX
    51  		case 2:
    52  			return x86.AMOVWLZX
    53  		}
    54  	}
    55  	// Otherwise, there's no difference between load and store opcodes.
    56  	return storeByType(t)
    57  }
    58  
    59  // storeByType returns the store instruction of the given type.
    60  func storeByType(t *types.Type) obj.As {
    61  	width := t.Size()
    62  	if t.IsFloat() {
    63  		switch width {
    64  		case 4:
    65  			return x86.AMOVSS
    66  		case 8:
    67  			return x86.AMOVSD
    68  		}
    69  	} else {
    70  		switch width {
    71  		case 1:
    72  			return x86.AMOVB
    73  		case 2:
    74  			return x86.AMOVW
    75  		case 4:
    76  			return x86.AMOVL
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // moveByType returns the reg->reg move instruction of the given type.
    83  func moveByType(t *types.Type) obj.As {
    84  	if t.IsFloat() {
    85  		switch t.Size() {
    86  		case 4:
    87  			return x86.AMOVSS
    88  		case 8:
    89  			return x86.AMOVSD
    90  		default:
    91  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    92  		}
    93  	} else {
    94  		switch t.Size() {
    95  		case 1:
    96  			// Avoids partial register write
    97  			return x86.AMOVL
    98  		case 2:
    99  			return x86.AMOVL
   100  		case 4:
   101  			return x86.AMOVL
   102  		default:
   103  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   104  		}
   105  	}
   106  }
   107  
   108  // opregreg emits instructions for
   109  //
   110  //	dest := dest(To) op src(From)
   111  //
   112  // and also returns the created obj.Prog so it
   113  // may be further adjusted (offset, scale, etc).
   114  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   115  	p := s.Prog(op)
   116  	p.From.Type = obj.TYPE_REG
   117  	p.To.Type = obj.TYPE_REG
   118  	p.To.Reg = dest
   119  	p.From.Reg = src
   120  	return p
   121  }
   122  
   123  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   124  	switch v.Op {
   125  	case ssa.Op386ADDL:
   126  		r := v.Reg()
   127  		r1 := v.Args[0].Reg()
   128  		r2 := v.Args[1].Reg()
   129  		switch {
   130  		case r == r1:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r2
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		case r == r2:
   137  			p := s.Prog(v.Op.Asm())
   138  			p.From.Type = obj.TYPE_REG
   139  			p.From.Reg = r1
   140  			p.To.Type = obj.TYPE_REG
   141  			p.To.Reg = r
   142  		default:
   143  			p := s.Prog(x86.ALEAL)
   144  			p.From.Type = obj.TYPE_MEM
   145  			p.From.Reg = r1
   146  			p.From.Scale = 1
   147  			p.From.Index = r2
   148  			p.To.Type = obj.TYPE_REG
   149  			p.To.Reg = r
   150  		}
   151  
   152  	// 2-address opcode arithmetic
   153  	case ssa.Op386SUBL,
   154  		ssa.Op386MULL,
   155  		ssa.Op386ANDL,
   156  		ssa.Op386ORL,
   157  		ssa.Op386XORL,
   158  		ssa.Op386SHLL,
   159  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   160  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   161  		ssa.Op386ROLL, ssa.Op386ROLW, ssa.Op386ROLB,
   162  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   163  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   164  		ssa.Op386PXOR,
   165  		ssa.Op386ADCL,
   166  		ssa.Op386SBBL:
   167  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   168  
   169  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   170  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   171  		opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
   172  
   173  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   174  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   175  		p := s.Prog(v.Op.Asm())
   176  		p.From.Type = obj.TYPE_CONST
   177  		p.From.Offset = v.AuxInt
   178  		p.To.Type = obj.TYPE_REG
   179  		p.To.Reg = v.Reg0()
   180  
   181  	case ssa.Op386DIVL, ssa.Op386DIVW,
   182  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   183  		ssa.Op386MODL, ssa.Op386MODW,
   184  		ssa.Op386MODLU, ssa.Op386MODWU:
   185  
   186  		// Arg[0] is already in AX as it's the only register we allow
   187  		// and AX is the only output
   188  		x := v.Args[1].Reg()
   189  
   190  		// CPU faults upon signed overflow, which occurs when most
   191  		// negative int is divided by -1.
   192  		var j *obj.Prog
   193  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   194  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   195  
   196  			if ssa.DivisionNeedsFixUp(v) {
   197  				var c *obj.Prog
   198  				switch v.Op {
   199  				case ssa.Op386DIVL, ssa.Op386MODL:
   200  					c = s.Prog(x86.ACMPL)
   201  					j = s.Prog(x86.AJEQ)
   202  
   203  				case ssa.Op386DIVW, ssa.Op386MODW:
   204  					c = s.Prog(x86.ACMPW)
   205  					j = s.Prog(x86.AJEQ)
   206  				}
   207  				c.From.Type = obj.TYPE_REG
   208  				c.From.Reg = x
   209  				c.To.Type = obj.TYPE_CONST
   210  				c.To.Offset = -1
   211  
   212  				j.To.Type = obj.TYPE_BRANCH
   213  			}
   214  			// sign extend the dividend
   215  			switch v.Op {
   216  			case ssa.Op386DIVL, ssa.Op386MODL:
   217  				s.Prog(x86.ACDQ)
   218  			case ssa.Op386DIVW, ssa.Op386MODW:
   219  				s.Prog(x86.ACWD)
   220  			}
   221  		}
   222  
   223  		// for unsigned ints, we sign extend by setting DX = 0
   224  		// signed ints were sign extended above
   225  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   226  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   227  			c := s.Prog(x86.AXORL)
   228  			c.From.Type = obj.TYPE_REG
   229  			c.From.Reg = x86.REG_DX
   230  			c.To.Type = obj.TYPE_REG
   231  			c.To.Reg = x86.REG_DX
   232  		}
   233  
   234  		p := s.Prog(v.Op.Asm())
   235  		p.From.Type = obj.TYPE_REG
   236  		p.From.Reg = x
   237  
   238  		// signed division, rest of the check for -1 case
   239  		if j != nil {
   240  			j2 := s.Prog(obj.AJMP)
   241  			j2.To.Type = obj.TYPE_BRANCH
   242  
   243  			var n *obj.Prog
   244  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   245  				// n * -1 = -n
   246  				n = s.Prog(x86.ANEGL)
   247  				n.To.Type = obj.TYPE_REG
   248  				n.To.Reg = x86.REG_AX
   249  			} else {
   250  				// n % -1 == 0
   251  				n = s.Prog(x86.AXORL)
   252  				n.From.Type = obj.TYPE_REG
   253  				n.From.Reg = x86.REG_DX
   254  				n.To.Type = obj.TYPE_REG
   255  				n.To.Reg = x86.REG_DX
   256  			}
   257  
   258  			j.To.SetTarget(n)
   259  			j2.To.SetTarget(s.Pc())
   260  		}
   261  
   262  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   263  		// the frontend rewrites constant division by 8/16/32 bit integers into
   264  		// HMUL by a constant
   265  		// SSA rewrites generate the 64 bit versions
   266  
   267  		// Arg[0] is already in AX as it's the only register we allow
   268  		// and DX is the only output we care about (the high bits)
   269  		p := s.Prog(v.Op.Asm())
   270  		p.From.Type = obj.TYPE_REG
   271  		p.From.Reg = v.Args[1].Reg()
   272  
   273  		// IMULB puts the high portion in AH instead of DL,
   274  		// so move it to DL for consistency
   275  		if v.Type.Size() == 1 {
   276  			m := s.Prog(x86.AMOVB)
   277  			m.From.Type = obj.TYPE_REG
   278  			m.From.Reg = x86.REG_AH
   279  			m.To.Type = obj.TYPE_REG
   280  			m.To.Reg = x86.REG_DX
   281  		}
   282  
   283  	case ssa.Op386MULLU:
   284  		// Arg[0] is already in AX as it's the only register we allow
   285  		// results lo in AX
   286  		p := s.Prog(v.Op.Asm())
   287  		p.From.Type = obj.TYPE_REG
   288  		p.From.Reg = v.Args[1].Reg()
   289  
   290  	case ssa.Op386MULLQU:
   291  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   292  		p := s.Prog(v.Op.Asm())
   293  		p.From.Type = obj.TYPE_REG
   294  		p.From.Reg = v.Args[1].Reg()
   295  
   296  	case ssa.Op386AVGLU:
   297  		// compute (x+y)/2 unsigned.
   298  		// Do a 32-bit add, the overflow goes into the carry.
   299  		// Shift right once and pull the carry back into the 31st bit.
   300  		p := s.Prog(x86.AADDL)
   301  		p.From.Type = obj.TYPE_REG
   302  		p.To.Type = obj.TYPE_REG
   303  		p.To.Reg = v.Reg()
   304  		p.From.Reg = v.Args[1].Reg()
   305  		p = s.Prog(x86.ARCRL)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = 1
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = v.Reg()
   310  
   311  	case ssa.Op386ADDLconst:
   312  		r := v.Reg()
   313  		a := v.Args[0].Reg()
   314  		if r == a {
   315  			if v.AuxInt == 1 {
   316  				p := s.Prog(x86.AINCL)
   317  				p.To.Type = obj.TYPE_REG
   318  				p.To.Reg = r
   319  				return
   320  			}
   321  			if v.AuxInt == -1 {
   322  				p := s.Prog(x86.ADECL)
   323  				p.To.Type = obj.TYPE_REG
   324  				p.To.Reg = r
   325  				return
   326  			}
   327  			p := s.Prog(v.Op.Asm())
   328  			p.From.Type = obj.TYPE_CONST
   329  			p.From.Offset = v.AuxInt
   330  			p.To.Type = obj.TYPE_REG
   331  			p.To.Reg = r
   332  			return
   333  		}
   334  		p := s.Prog(x86.ALEAL)
   335  		p.From.Type = obj.TYPE_MEM
   336  		p.From.Reg = a
   337  		p.From.Offset = v.AuxInt
   338  		p.To.Type = obj.TYPE_REG
   339  		p.To.Reg = r
   340  
   341  	case ssa.Op386MULLconst:
   342  		r := v.Reg()
   343  		p := s.Prog(v.Op.Asm())
   344  		p.From.Type = obj.TYPE_CONST
   345  		p.From.Offset = v.AuxInt
   346  		p.To.Type = obj.TYPE_REG
   347  		p.To.Reg = r
   348  		p.SetFrom3Reg(v.Args[0].Reg())
   349  
   350  	case ssa.Op386SUBLconst,
   351  		ssa.Op386ADCLconst,
   352  		ssa.Op386SBBLconst,
   353  		ssa.Op386ANDLconst,
   354  		ssa.Op386ORLconst,
   355  		ssa.Op386XORLconst,
   356  		ssa.Op386SHLLconst,
   357  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   358  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   359  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   360  		p := s.Prog(v.Op.Asm())
   361  		p.From.Type = obj.TYPE_CONST
   362  		p.From.Offset = v.AuxInt
   363  		p.To.Type = obj.TYPE_REG
   364  		p.To.Reg = v.Reg()
   365  	case ssa.Op386SBBLcarrymask:
   366  		r := v.Reg()
   367  		p := s.Prog(v.Op.Asm())
   368  		p.From.Type = obj.TYPE_REG
   369  		p.From.Reg = r
   370  		p.To.Type = obj.TYPE_REG
   371  		p.To.Reg = r
   372  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   373  		r := v.Args[0].Reg()
   374  		i := v.Args[1].Reg()
   375  		p := s.Prog(x86.ALEAL)
   376  		switch v.Op {
   377  		case ssa.Op386LEAL1:
   378  			p.From.Scale = 1
   379  			if i == x86.REG_SP {
   380  				r, i = i, r
   381  			}
   382  		case ssa.Op386LEAL2:
   383  			p.From.Scale = 2
   384  		case ssa.Op386LEAL4:
   385  			p.From.Scale = 4
   386  		case ssa.Op386LEAL8:
   387  			p.From.Scale = 8
   388  		}
   389  		p.From.Type = obj.TYPE_MEM
   390  		p.From.Reg = r
   391  		p.From.Index = i
   392  		ssagen.AddAux(&p.From, v)
   393  		p.To.Type = obj.TYPE_REG
   394  		p.To.Reg = v.Reg()
   395  	case ssa.Op386LEAL:
   396  		p := s.Prog(x86.ALEAL)
   397  		p.From.Type = obj.TYPE_MEM
   398  		p.From.Reg = v.Args[0].Reg()
   399  		ssagen.AddAux(&p.From, v)
   400  		p.To.Type = obj.TYPE_REG
   401  		p.To.Reg = v.Reg()
   402  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   403  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   404  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   405  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   406  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   407  		// must account for that right here.
   408  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   409  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   410  		p := s.Prog(v.Op.Asm())
   411  		p.From.Type = obj.TYPE_REG
   412  		p.From.Reg = v.Args[0].Reg()
   413  		p.To.Type = obj.TYPE_CONST
   414  		p.To.Offset = v.AuxInt
   415  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_CONST
   418  		p.From.Offset = v.AuxInt
   419  		p.To.Type = obj.TYPE_REG
   420  		p.To.Reg = v.Args[0].Reg()
   421  	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_MEM
   424  		p.From.Reg = v.Args[0].Reg()
   425  		ssagen.AddAux(&p.From, v)
   426  		p.To.Type = obj.TYPE_REG
   427  		p.To.Reg = v.Args[1].Reg()
   428  	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   429  		sc := v.AuxValAndOff()
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_MEM
   432  		p.From.Reg = v.Args[0].Reg()
   433  		ssagen.AddAux2(&p.From, v, sc.Off64())
   434  		p.To.Type = obj.TYPE_CONST
   435  		p.To.Offset = sc.Val64()
   436  	case ssa.Op386MOVLconst:
   437  		x := v.Reg()
   438  
   439  		// If flags aren't live (indicated by v.Aux == nil),
   440  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   441  		if v.AuxInt == 0 && v.Aux == nil {
   442  			p := s.Prog(x86.AXORL)
   443  			p.From.Type = obj.TYPE_REG
   444  			p.From.Reg = x
   445  			p.To.Type = obj.TYPE_REG
   446  			p.To.Reg = x
   447  			break
   448  		}
   449  
   450  		p := s.Prog(v.Op.Asm())
   451  		p.From.Type = obj.TYPE_CONST
   452  		p.From.Offset = v.AuxInt
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = x
   455  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   456  		x := v.Reg()
   457  		p := s.Prog(v.Op.Asm())
   458  		p.From.Type = obj.TYPE_FCONST
   459  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   460  		p.To.Type = obj.TYPE_REG
   461  		p.To.Reg = x
   462  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   463  		p := s.Prog(x86.ALEAL)
   464  		p.From.Type = obj.TYPE_MEM
   465  		p.From.Name = obj.NAME_EXTERN
   466  		f := math.Float64frombits(uint64(v.AuxInt))
   467  		if v.Op == ssa.Op386MOVSDconst1 {
   468  			p.From.Sym = base.Ctxt.Float64Sym(f)
   469  		} else {
   470  			p.From.Sym = base.Ctxt.Float32Sym(float32(f))
   471  		}
   472  		p.To.Type = obj.TYPE_REG
   473  		p.To.Reg = v.Reg()
   474  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   475  		p := s.Prog(v.Op.Asm())
   476  		p.From.Type = obj.TYPE_MEM
   477  		p.From.Reg = v.Args[0].Reg()
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg()
   480  
   481  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		ssagen.AddAux(&p.From, v)
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = v.Reg()
   488  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   489  		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   490  		r := v.Args[0].Reg()
   491  		i := v.Args[1].Reg()
   492  		p := s.Prog(v.Op.Asm())
   493  		p.From.Type = obj.TYPE_MEM
   494  		switch v.Op {
   495  		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   496  			if i == x86.REG_SP {
   497  				r, i = i, r
   498  			}
   499  			p.From.Scale = 1
   500  		case ssa.Op386MOVSDloadidx8:
   501  			p.From.Scale = 8
   502  		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   503  			p.From.Scale = 4
   504  		case ssa.Op386MOVWloadidx2:
   505  			p.From.Scale = 2
   506  		}
   507  		p.From.Reg = r
   508  		p.From.Index = i
   509  		ssagen.AddAux(&p.From, v)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = v.Reg()
   512  	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   513  		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[1].Reg()
   517  		p.From.Index = v.Args[2].Reg()
   518  		p.From.Scale = 4
   519  		ssagen.AddAux(&p.From, v)
   520  		p.To.Type = obj.TYPE_REG
   521  		p.To.Reg = v.Reg()
   522  	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   523  		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   524  		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   525  		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   526  		p := s.Prog(v.Op.Asm())
   527  		p.From.Type = obj.TYPE_MEM
   528  		p.From.Reg = v.Args[1].Reg()
   529  		ssagen.AddAux(&p.From, v)
   530  		p.To.Type = obj.TYPE_REG
   531  		p.To.Reg = v.Reg()
   532  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   533  		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   534  		p := s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = v.Args[1].Reg()
   537  		p.To.Type = obj.TYPE_MEM
   538  		p.To.Reg = v.Args[0].Reg()
   539  		ssagen.AddAux(&p.To, v)
   540  	case ssa.Op386ADDLconstmodify:
   541  		sc := v.AuxValAndOff()
   542  		val := sc.Val()
   543  		if val == 1 || val == -1 {
   544  			var p *obj.Prog
   545  			if val == 1 {
   546  				p = s.Prog(x86.AINCL)
   547  			} else {
   548  				p = s.Prog(x86.ADECL)
   549  			}
   550  			off := sc.Off64()
   551  			p.To.Type = obj.TYPE_MEM
   552  			p.To.Reg = v.Args[0].Reg()
   553  			ssagen.AddAux2(&p.To, v, off)
   554  			break
   555  		}
   556  		fallthrough
   557  	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   558  		sc := v.AuxValAndOff()
   559  		off := sc.Off64()
   560  		val := sc.Val64()
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_CONST
   563  		p.From.Offset = val
   564  		p.To.Type = obj.TYPE_MEM
   565  		p.To.Reg = v.Args[0].Reg()
   566  		ssagen.AddAux2(&p.To, v, off)
   567  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   568  		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   569  		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   570  		r := v.Args[0].Reg()
   571  		i := v.Args[1].Reg()
   572  		p := s.Prog(v.Op.Asm())
   573  		p.From.Type = obj.TYPE_REG
   574  		p.From.Reg = v.Args[2].Reg()
   575  		p.To.Type = obj.TYPE_MEM
   576  		switch v.Op {
   577  		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   578  			if i == x86.REG_SP {
   579  				r, i = i, r
   580  			}
   581  			p.To.Scale = 1
   582  		case ssa.Op386MOVSDstoreidx8:
   583  			p.To.Scale = 8
   584  		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   585  			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   586  			p.To.Scale = 4
   587  		case ssa.Op386MOVWstoreidx2:
   588  			p.To.Scale = 2
   589  		}
   590  		p.To.Reg = r
   591  		p.To.Index = i
   592  		ssagen.AddAux(&p.To, v)
   593  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   594  		p := s.Prog(v.Op.Asm())
   595  		p.From.Type = obj.TYPE_CONST
   596  		sc := v.AuxValAndOff()
   597  		p.From.Offset = sc.Val64()
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = v.Args[0].Reg()
   600  		ssagen.AddAux2(&p.To, v, sc.Off64())
   601  	case ssa.Op386ADDLconstmodifyidx4:
   602  		sc := v.AuxValAndOff()
   603  		val := sc.Val()
   604  		if val == 1 || val == -1 {
   605  			var p *obj.Prog
   606  			if val == 1 {
   607  				p = s.Prog(x86.AINCL)
   608  			} else {
   609  				p = s.Prog(x86.ADECL)
   610  			}
   611  			off := sc.Off64()
   612  			p.To.Type = obj.TYPE_MEM
   613  			p.To.Reg = v.Args[0].Reg()
   614  			p.To.Scale = 4
   615  			p.To.Index = v.Args[1].Reg()
   616  			ssagen.AddAux2(&p.To, v, off)
   617  			break
   618  		}
   619  		fallthrough
   620  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   621  		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   622  		p := s.Prog(v.Op.Asm())
   623  		p.From.Type = obj.TYPE_CONST
   624  		sc := v.AuxValAndOff()
   625  		p.From.Offset = sc.Val64()
   626  		r := v.Args[0].Reg()
   627  		i := v.Args[1].Reg()
   628  		switch v.Op {
   629  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   630  			p.To.Scale = 1
   631  			if i == x86.REG_SP {
   632  				r, i = i, r
   633  			}
   634  		case ssa.Op386MOVWstoreconstidx2:
   635  			p.To.Scale = 2
   636  		case ssa.Op386MOVLstoreconstidx4,
   637  			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   638  			p.To.Scale = 4
   639  		}
   640  		p.To.Type = obj.TYPE_MEM
   641  		p.To.Reg = r
   642  		p.To.Index = i
   643  		ssagen.AddAux2(&p.To, v, sc.Off64())
   644  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   645  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   646  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   647  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   648  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   649  	case ssa.Op386DUFFZERO:
   650  		p := s.Prog(obj.ADUFFZERO)
   651  		p.To.Type = obj.TYPE_ADDR
   652  		p.To.Sym = ir.Syms.Duffzero
   653  		p.To.Offset = v.AuxInt
   654  	case ssa.Op386DUFFCOPY:
   655  		p := s.Prog(obj.ADUFFCOPY)
   656  		p.To.Type = obj.TYPE_ADDR
   657  		p.To.Sym = ir.Syms.Duffcopy
   658  		p.To.Offset = v.AuxInt
   659  
   660  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   661  		if v.Type.IsMemory() {
   662  			return
   663  		}
   664  		x := v.Args[0].Reg()
   665  		y := v.Reg()
   666  		if x != y {
   667  			opregreg(s, moveByType(v.Type), y, x)
   668  		}
   669  	case ssa.OpLoadReg:
   670  		if v.Type.IsFlags() {
   671  			v.Fatalf("load flags not implemented: %v", v.LongString())
   672  			return
   673  		}
   674  		p := s.Prog(loadByType(v.Type))
   675  		ssagen.AddrAuto(&p.From, v.Args[0])
   676  		p.To.Type = obj.TYPE_REG
   677  		p.To.Reg = v.Reg()
   678  
   679  	case ssa.OpStoreReg:
   680  		if v.Type.IsFlags() {
   681  			v.Fatalf("store flags not implemented: %v", v.LongString())
   682  			return
   683  		}
   684  		p := s.Prog(storeByType(v.Type))
   685  		p.From.Type = obj.TYPE_REG
   686  		p.From.Reg = v.Args[0].Reg()
   687  		ssagen.AddrAuto(&p.To, v)
   688  	case ssa.Op386LoweredGetClosurePtr:
   689  		// Closure pointer is DX.
   690  		ssagen.CheckLoweredGetClosurePtr(v)
   691  	case ssa.Op386LoweredGetG:
   692  		r := v.Reg()
   693  		// See the comments in cmd/internal/obj/x86/obj6.go
   694  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   695  		if x86.CanUse1InsnTLS(base.Ctxt) {
   696  			// MOVL (TLS), r
   697  			p := s.Prog(x86.AMOVL)
   698  			p.From.Type = obj.TYPE_MEM
   699  			p.From.Reg = x86.REG_TLS
   700  			p.To.Type = obj.TYPE_REG
   701  			p.To.Reg = r
   702  		} else {
   703  			// MOVL TLS, r
   704  			// MOVL (r)(TLS*1), r
   705  			p := s.Prog(x86.AMOVL)
   706  			p.From.Type = obj.TYPE_REG
   707  			p.From.Reg = x86.REG_TLS
   708  			p.To.Type = obj.TYPE_REG
   709  			p.To.Reg = r
   710  			q := s.Prog(x86.AMOVL)
   711  			q.From.Type = obj.TYPE_MEM
   712  			q.From.Reg = r
   713  			q.From.Index = x86.REG_TLS
   714  			q.From.Scale = 1
   715  			q.To.Type = obj.TYPE_REG
   716  			q.To.Reg = r
   717  		}
   718  
   719  	case ssa.Op386LoweredGetCallerPC:
   720  		p := s.Prog(x86.AMOVL)
   721  		p.From.Type = obj.TYPE_MEM
   722  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   723  		p.From.Name = obj.NAME_PARAM
   724  		p.To.Type = obj.TYPE_REG
   725  		p.To.Reg = v.Reg()
   726  
   727  	case ssa.Op386LoweredGetCallerSP:
   728  		// caller's SP is the address of the first arg
   729  		p := s.Prog(x86.AMOVL)
   730  		p.From.Type = obj.TYPE_ADDR
   731  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on 386, just to be consistent with other architectures
   732  		p.From.Name = obj.NAME_PARAM
   733  		p.To.Type = obj.TYPE_REG
   734  		p.To.Reg = v.Reg()
   735  
   736  	case ssa.Op386LoweredWB:
   737  		p := s.Prog(obj.ACALL)
   738  		p.To.Type = obj.TYPE_MEM
   739  		p.To.Name = obj.NAME_EXTERN
   740  		p.To.Sym = v.Aux.(*obj.LSym)
   741  
   742  	case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC:
   743  		p := s.Prog(obj.ACALL)
   744  		p.To.Type = obj.TYPE_MEM
   745  		p.To.Name = obj.NAME_EXTERN
   746  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
   747  		s.UseArgs(8) // space used in callee args area by assembly stubs
   748  
   749  	case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC:
   750  		p := s.Prog(obj.ACALL)
   751  		p.To.Type = obj.TYPE_MEM
   752  		p.To.Name = obj.NAME_EXTERN
   753  		p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt]
   754  		s.UseArgs(12) // space used in callee args area by assembly stubs
   755  
   756  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   757  		s.Call(v)
   758  	case ssa.Op386CALLtail:
   759  		s.TailCall(v)
   760  	case ssa.Op386NEGL,
   761  		ssa.Op386BSWAPL,
   762  		ssa.Op386NOTL:
   763  		p := s.Prog(v.Op.Asm())
   764  		p.To.Type = obj.TYPE_REG
   765  		p.To.Reg = v.Reg()
   766  	case ssa.Op386BSFL, ssa.Op386BSFW,
   767  		ssa.Op386BSRL, ssa.Op386BSRW,
   768  		ssa.Op386SQRTSS, ssa.Op386SQRTSD:
   769  		p := s.Prog(v.Op.Asm())
   770  		p.From.Type = obj.TYPE_REG
   771  		p.From.Reg = v.Args[0].Reg()
   772  		p.To.Type = obj.TYPE_REG
   773  		p.To.Reg = v.Reg()
   774  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   775  		ssa.Op386SETL, ssa.Op386SETLE,
   776  		ssa.Op386SETG, ssa.Op386SETGE,
   777  		ssa.Op386SETGF, ssa.Op386SETGEF,
   778  		ssa.Op386SETB, ssa.Op386SETBE,
   779  		ssa.Op386SETORD, ssa.Op386SETNAN,
   780  		ssa.Op386SETA, ssa.Op386SETAE,
   781  		ssa.Op386SETO:
   782  		p := s.Prog(v.Op.Asm())
   783  		p.To.Type = obj.TYPE_REG
   784  		p.To.Reg = v.Reg()
   785  
   786  	case ssa.Op386SETNEF:
   787  		p := s.Prog(v.Op.Asm())
   788  		p.To.Type = obj.TYPE_REG
   789  		p.To.Reg = v.Reg()
   790  		q := s.Prog(x86.ASETPS)
   791  		q.To.Type = obj.TYPE_REG
   792  		q.To.Reg = x86.REG_AX
   793  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   794  
   795  	case ssa.Op386SETEQF:
   796  		p := s.Prog(v.Op.Asm())
   797  		p.To.Type = obj.TYPE_REG
   798  		p.To.Reg = v.Reg()
   799  		q := s.Prog(x86.ASETPC)
   800  		q.To.Type = obj.TYPE_REG
   801  		q.To.Reg = x86.REG_AX
   802  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   803  
   804  	case ssa.Op386InvertFlags:
   805  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   806  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   807  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   808  	case ssa.Op386REPSTOSL:
   809  		s.Prog(x86.AREP)
   810  		s.Prog(x86.ASTOSL)
   811  	case ssa.Op386REPMOVSL:
   812  		s.Prog(x86.AREP)
   813  		s.Prog(x86.AMOVSL)
   814  	case ssa.Op386LoweredNilCheck:
   815  		// Issue a load which will fault if the input is nil.
   816  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   817  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   818  		// but it doesn't have false dependency on AX.
   819  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   820  		// That trades clobbering flags for clobbering a register.
   821  		p := s.Prog(x86.ATESTB)
   822  		p.From.Type = obj.TYPE_REG
   823  		p.From.Reg = x86.REG_AX
   824  		p.To.Type = obj.TYPE_MEM
   825  		p.To.Reg = v.Args[0].Reg()
   826  		ssagen.AddAux(&p.To, v)
   827  		if logopt.Enabled() {
   828  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
   829  		}
   830  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   831  			base.WarnfAt(v.Pos, "generated nil check")
   832  		}
   833  	case ssa.OpClobber:
   834  		p := s.Prog(x86.AMOVL)
   835  		p.From.Type = obj.TYPE_CONST
   836  		p.From.Offset = 0xdeaddead
   837  		p.To.Type = obj.TYPE_MEM
   838  		p.To.Reg = x86.REG_SP
   839  		ssagen.AddAux(&p.To, v)
   840  	case ssa.OpClobberReg:
   841  		// TODO: implement for clobberdead experiment. Nop is ok for now.
   842  	default:
   843  		v.Fatalf("genValue not implemented: %s", v.LongString())
   844  	}
   845  }
   846  
   847  var blockJump = [...]struct {
   848  	asm, invasm obj.As
   849  }{
   850  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   851  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   852  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   853  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   854  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   855  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   856  	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
   857  	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
   858  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   859  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   860  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   861  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   862  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   863  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   864  }
   865  
   866  var eqfJumps = [2][2]ssagen.IndexJump{
   867  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   868  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   869  }
   870  var nefJumps = [2][2]ssagen.IndexJump{
   871  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   872  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   873  }
   874  
   875  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
   876  	switch b.Kind {
   877  	case ssa.BlockPlain:
   878  		if b.Succs[0].Block() != next {
   879  			p := s.Prog(obj.AJMP)
   880  			p.To.Type = obj.TYPE_BRANCH
   881  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
   882  		}
   883  	case ssa.BlockDefer:
   884  		// defer returns in rax:
   885  		// 0 if we should continue executing
   886  		// 1 if we should jump to deferreturn call
   887  		p := s.Prog(x86.ATESTL)
   888  		p.From.Type = obj.TYPE_REG
   889  		p.From.Reg = x86.REG_AX
   890  		p.To.Type = obj.TYPE_REG
   891  		p.To.Reg = x86.REG_AX
   892  		p = s.Prog(x86.AJNE)
   893  		p.To.Type = obj.TYPE_BRANCH
   894  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
   895  		if b.Succs[0].Block() != next {
   896  			p := s.Prog(obj.AJMP)
   897  			p.To.Type = obj.TYPE_BRANCH
   898  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
   899  		}
   900  	case ssa.BlockExit, ssa.BlockRetJmp:
   901  	case ssa.BlockRet:
   902  		s.Prog(obj.ARET)
   903  
   904  	case ssa.Block386EQF:
   905  		s.CombJump(b, next, &eqfJumps)
   906  
   907  	case ssa.Block386NEF:
   908  		s.CombJump(b, next, &nefJumps)
   909  
   910  	case ssa.Block386EQ, ssa.Block386NE,
   911  		ssa.Block386LT, ssa.Block386GE,
   912  		ssa.Block386LE, ssa.Block386GT,
   913  		ssa.Block386OS, ssa.Block386OC,
   914  		ssa.Block386ULT, ssa.Block386UGT,
   915  		ssa.Block386ULE, ssa.Block386UGE:
   916  		jmp := blockJump[b.Kind]
   917  		switch next {
   918  		case b.Succs[0].Block():
   919  			s.Br(jmp.invasm, b.Succs[1].Block())
   920  		case b.Succs[1].Block():
   921  			s.Br(jmp.asm, b.Succs[0].Block())
   922  		default:
   923  			if b.Likely != ssa.BranchUnlikely {
   924  				s.Br(jmp.asm, b.Succs[0].Block())
   925  				s.Br(obj.AJMP, b.Succs[1].Block())
   926  			} else {
   927  				s.Br(jmp.invasm, b.Succs[1].Block())
   928  				s.Br(obj.AJMP, b.Succs[0].Block())
   929  			}
   930  		}
   931  	default:
   932  		b.Fatalf("branch not implemented: %s", b.LongString())
   933  	}
   934  }