github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"github.com/go-asm/go/cmd/compile/base"
    12  	"github.com/go-asm/go/cmd/compile/ir"
    13  	"github.com/go-asm/go/cmd/compile/logopt"
    14  	"github.com/go-asm/go/cmd/compile/ssa"
    15  	"github.com/go-asm/go/cmd/compile/ssagen"
    16  	"github.com/go-asm/go/cmd/compile/types"
    17  	"github.com/go-asm/go/cmd/obj"
    18  	"github.com/go-asm/go/cmd/obj/x86"
    19  )
    20  
    21  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    22  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    23  	flive := b.FlagsLiveAtEnd
    24  	for _, c := range b.ControlValues() {
    25  		flive = c.Type.IsFlags() || flive
    26  	}
    27  	for i := len(b.Values) - 1; i >= 0; i-- {
    28  		v := b.Values[i]
    29  		if flive && v.Op == ssa.Op386MOVLconst {
    30  			// The "mark" is any non-nil Aux value.
    31  			v.Aux = ssa.AuxMark
    32  		}
    33  		if v.Type.IsFlags() {
    34  			flive = false
    35  		}
    36  		for _, a := range v.Args {
    37  			if a.Type.IsFlags() {
    38  				flive = true
    39  			}
    40  		}
    41  	}
    42  }
    43  
    44  // loadByType returns the load instruction of the given type.
    45  func loadByType(t *types.Type) obj.As {
    46  	// Avoid partial register write
    47  	if !t.IsFloat() {
    48  		switch t.Size() {
    49  		case 1:
    50  			return x86.AMOVBLZX
    51  		case 2:
    52  			return x86.AMOVWLZX
    53  		}
    54  	}
    55  	// Otherwise, there's no difference between load and store opcodes.
    56  	return storeByType(t)
    57  }
    58  
    59  // storeByType returns the store instruction of the given type.
    60  func storeByType(t *types.Type) obj.As {
    61  	width := t.Size()
    62  	if t.IsFloat() {
    63  		switch width {
    64  		case 4:
    65  			return x86.AMOVSS
    66  		case 8:
    67  			return x86.AMOVSD
    68  		}
    69  	} else {
    70  		switch width {
    71  		case 1:
    72  			return x86.AMOVB
    73  		case 2:
    74  			return x86.AMOVW
    75  		case 4:
    76  			return x86.AMOVL
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // moveByType returns the reg->reg move instruction of the given type.
    83  func moveByType(t *types.Type) obj.As {
    84  	if t.IsFloat() {
    85  		switch t.Size() {
    86  		case 4:
    87  			return x86.AMOVSS
    88  		case 8:
    89  			return x86.AMOVSD
    90  		default:
    91  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    92  		}
    93  	} else {
    94  		switch t.Size() {
    95  		case 1:
    96  			// Avoids partial register write
    97  			return x86.AMOVL
    98  		case 2:
    99  			return x86.AMOVL
   100  		case 4:
   101  			return x86.AMOVL
   102  		default:
   103  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   104  		}
   105  	}
   106  }
   107  
   108  // opregreg emits instructions for
   109  //
   110  //	dest := dest(To) op src(From)
   111  //
   112  // and also returns the created obj.Prog so it
   113  // may be further adjusted (offset, scale, etc).
   114  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   115  	p := s.Prog(op)
   116  	p.From.Type = obj.TYPE_REG
   117  	p.To.Type = obj.TYPE_REG
   118  	p.To.Reg = dest
   119  	p.From.Reg = src
   120  	return p
   121  }
   122  
   123  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   124  	switch v.Op {
   125  	case ssa.Op386ADDL:
   126  		r := v.Reg()
   127  		r1 := v.Args[0].Reg()
   128  		r2 := v.Args[1].Reg()
   129  		switch {
   130  		case r == r1:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r2
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		case r == r2:
   137  			p := s.Prog(v.Op.Asm())
   138  			p.From.Type = obj.TYPE_REG
   139  			p.From.Reg = r1
   140  			p.To.Type = obj.TYPE_REG
   141  			p.To.Reg = r
   142  		default:
   143  			p := s.Prog(x86.ALEAL)
   144  			p.From.Type = obj.TYPE_MEM
   145  			p.From.Reg = r1
   146  			p.From.Scale = 1
   147  			p.From.Index = r2
   148  			p.To.Type = obj.TYPE_REG
   149  			p.To.Reg = r
   150  		}
   151  
   152  	// 2-address opcode arithmetic
   153  	case ssa.Op386SUBL,
   154  		ssa.Op386MULL,
   155  		ssa.Op386ANDL,
   156  		ssa.Op386ORL,
   157  		ssa.Op386XORL,
   158  		ssa.Op386SHLL,
   159  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   160  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   161  		ssa.Op386ROLL, ssa.Op386ROLW, ssa.Op386ROLB,
   162  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   163  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   164  		ssa.Op386PXOR,
   165  		ssa.Op386ADCL,
   166  		ssa.Op386SBBL:
   167  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   168  
   169  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   170  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   171  		opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
   172  
   173  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   174  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   175  		p := s.Prog(v.Op.Asm())
   176  		p.From.Type = obj.TYPE_CONST
   177  		p.From.Offset = v.AuxInt
   178  		p.To.Type = obj.TYPE_REG
   179  		p.To.Reg = v.Reg0()
   180  
   181  	case ssa.Op386DIVL, ssa.Op386DIVW,
   182  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   183  		ssa.Op386MODL, ssa.Op386MODW,
   184  		ssa.Op386MODLU, ssa.Op386MODWU:
   185  
   186  		// Arg[0] is already in AX as it's the only register we allow
   187  		// and AX is the only output
   188  		x := v.Args[1].Reg()
   189  
   190  		// CPU faults upon signed overflow, which occurs when most
   191  		// negative int is divided by -1.
   192  		var j *obj.Prog
   193  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   194  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   195  
   196  			if ssa.DivisionNeedsFixUp(v) {
   197  				var c *obj.Prog
   198  				switch v.Op {
   199  				case ssa.Op386DIVL, ssa.Op386MODL:
   200  					c = s.Prog(x86.ACMPL)
   201  					j = s.Prog(x86.AJEQ)
   202  
   203  				case ssa.Op386DIVW, ssa.Op386MODW:
   204  					c = s.Prog(x86.ACMPW)
   205  					j = s.Prog(x86.AJEQ)
   206  				}
   207  				c.From.Type = obj.TYPE_REG
   208  				c.From.Reg = x
   209  				c.To.Type = obj.TYPE_CONST
   210  				c.To.Offset = -1
   211  
   212  				j.To.Type = obj.TYPE_BRANCH
   213  			}
   214  			// sign extend the dividend
   215  			switch v.Op {
   216  			case ssa.Op386DIVL, ssa.Op386MODL:
   217  				s.Prog(x86.ACDQ)
   218  			case ssa.Op386DIVW, ssa.Op386MODW:
   219  				s.Prog(x86.ACWD)
   220  			}
   221  		}
   222  
   223  		// for unsigned ints, we sign extend by setting DX = 0
   224  		// signed ints were sign extended above
   225  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   226  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   227  			c := s.Prog(x86.AXORL)
   228  			c.From.Type = obj.TYPE_REG
   229  			c.From.Reg = x86.REG_DX
   230  			c.To.Type = obj.TYPE_REG
   231  			c.To.Reg = x86.REG_DX
   232  		}
   233  
   234  		p := s.Prog(v.Op.Asm())
   235  		p.From.Type = obj.TYPE_REG
   236  		p.From.Reg = x
   237  
   238  		// signed division, rest of the check for -1 case
   239  		if j != nil {
   240  			j2 := s.Prog(obj.AJMP)
   241  			j2.To.Type = obj.TYPE_BRANCH
   242  
   243  			var n *obj.Prog
   244  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   245  				// n * -1 = -n
   246  				n = s.Prog(x86.ANEGL)
   247  				n.To.Type = obj.TYPE_REG
   248  				n.To.Reg = x86.REG_AX
   249  			} else {
   250  				// n % -1 == 0
   251  				n = s.Prog(x86.AXORL)
   252  				n.From.Type = obj.TYPE_REG
   253  				n.From.Reg = x86.REG_DX
   254  				n.To.Type = obj.TYPE_REG
   255  				n.To.Reg = x86.REG_DX
   256  			}
   257  
   258  			j.To.SetTarget(n)
   259  			j2.To.SetTarget(s.Pc())
   260  		}
   261  
   262  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   263  		// the frontend rewrites constant division by 8/16/32 bit integers into
   264  		// HMUL by a constant
   265  		// SSA rewrites generate the 64 bit versions
   266  
   267  		// Arg[0] is already in AX as it's the only register we allow
   268  		// and DX is the only output we care about (the high bits)
   269  		p := s.Prog(v.Op.Asm())
   270  		p.From.Type = obj.TYPE_REG
   271  		p.From.Reg = v.Args[1].Reg()
   272  
   273  		// IMULB puts the high portion in AH instead of DL,
   274  		// so move it to DL for consistency
   275  		if v.Type.Size() == 1 {
   276  			m := s.Prog(x86.AMOVB)
   277  			m.From.Type = obj.TYPE_REG
   278  			m.From.Reg = x86.REG_AH
   279  			m.To.Type = obj.TYPE_REG
   280  			m.To.Reg = x86.REG_DX
   281  		}
   282  
   283  	case ssa.Op386MULLU:
   284  		// Arg[0] is already in AX as it's the only register we allow
   285  		// results lo in AX
   286  		p := s.Prog(v.Op.Asm())
   287  		p.From.Type = obj.TYPE_REG
   288  		p.From.Reg = v.Args[1].Reg()
   289  
   290  	case ssa.Op386MULLQU:
   291  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   292  		p := s.Prog(v.Op.Asm())
   293  		p.From.Type = obj.TYPE_REG
   294  		p.From.Reg = v.Args[1].Reg()
   295  
   296  	case ssa.Op386AVGLU:
   297  		// compute (x+y)/2 unsigned.
   298  		// Do a 32-bit add, the overflow goes into the carry.
   299  		// Shift right once and pull the carry back into the 31st bit.
   300  		p := s.Prog(x86.AADDL)
   301  		p.From.Type = obj.TYPE_REG
   302  		p.To.Type = obj.TYPE_REG
   303  		p.To.Reg = v.Reg()
   304  		p.From.Reg = v.Args[1].Reg()
   305  		p = s.Prog(x86.ARCRL)
   306  		p.From.Type = obj.TYPE_CONST
   307  		p.From.Offset = 1
   308  		p.To.Type = obj.TYPE_REG
   309  		p.To.Reg = v.Reg()
   310  
   311  	case ssa.Op386ADDLconst:
   312  		r := v.Reg()
   313  		a := v.Args[0].Reg()
   314  		if r == a {
   315  			if v.AuxInt == 1 {
   316  				p := s.Prog(x86.AINCL)
   317  				p.To.Type = obj.TYPE_REG
   318  				p.To.Reg = r
   319  				return
   320  			}
   321  			if v.AuxInt == -1 {
   322  				p := s.Prog(x86.ADECL)
   323  				p.To.Type = obj.TYPE_REG
   324  				p.To.Reg = r
   325  				return
   326  			}
   327  			p := s.Prog(v.Op.Asm())
   328  			p.From.Type = obj.TYPE_CONST
   329  			p.From.Offset = v.AuxInt
   330  			p.To.Type = obj.TYPE_REG
   331  			p.To.Reg = r
   332  			return
   333  		}
   334  		p := s.Prog(x86.ALEAL)
   335  		p.From.Type = obj.TYPE_MEM
   336  		p.From.Reg = a
   337  		p.From.Offset = v.AuxInt
   338  		p.To.Type = obj.TYPE_REG
   339  		p.To.Reg = r
   340  
   341  	case ssa.Op386MULLconst:
   342  		r := v.Reg()
   343  		p := s.Prog(v.Op.Asm())
   344  		p.From.Type = obj.TYPE_CONST
   345  		p.From.Offset = v.AuxInt
   346  		p.To.Type = obj.TYPE_REG
   347  		p.To.Reg = r
   348  		p.AddRestSourceReg(v.Args[0].Reg())
   349  
   350  	case ssa.Op386SUBLconst,
   351  		ssa.Op386ADCLconst,
   352  		ssa.Op386SBBLconst,
   353  		ssa.Op386ANDLconst,
   354  		ssa.Op386ORLconst,
   355  		ssa.Op386XORLconst,
   356  		ssa.Op386SHLLconst,
   357  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   358  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   359  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   360  		p := s.Prog(v.Op.Asm())
   361  		p.From.Type = obj.TYPE_CONST
   362  		p.From.Offset = v.AuxInt
   363  		p.To.Type = obj.TYPE_REG
   364  		p.To.Reg = v.Reg()
   365  	case ssa.Op386SBBLcarrymask:
   366  		r := v.Reg()
   367  		p := s.Prog(v.Op.Asm())
   368  		p.From.Type = obj.TYPE_REG
   369  		p.From.Reg = r
   370  		p.To.Type = obj.TYPE_REG
   371  		p.To.Reg = r
   372  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   373  		r := v.Args[0].Reg()
   374  		i := v.Args[1].Reg()
   375  		p := s.Prog(x86.ALEAL)
   376  		switch v.Op {
   377  		case ssa.Op386LEAL1:
   378  			p.From.Scale = 1
   379  			if i == x86.REG_SP {
   380  				r, i = i, r
   381  			}
   382  		case ssa.Op386LEAL2:
   383  			p.From.Scale = 2
   384  		case ssa.Op386LEAL4:
   385  			p.From.Scale = 4
   386  		case ssa.Op386LEAL8:
   387  			p.From.Scale = 8
   388  		}
   389  		p.From.Type = obj.TYPE_MEM
   390  		p.From.Reg = r
   391  		p.From.Index = i
   392  		ssagen.AddAux(&p.From, v)
   393  		p.To.Type = obj.TYPE_REG
   394  		p.To.Reg = v.Reg()
   395  	case ssa.Op386LEAL:
   396  		p := s.Prog(x86.ALEAL)
   397  		p.From.Type = obj.TYPE_MEM
   398  		p.From.Reg = v.Args[0].Reg()
   399  		ssagen.AddAux(&p.From, v)
   400  		p.To.Type = obj.TYPE_REG
   401  		p.To.Reg = v.Reg()
   402  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   403  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   404  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   405  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   406  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   407  		// must account for that right here.
   408  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   409  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   410  		p := s.Prog(v.Op.Asm())
   411  		p.From.Type = obj.TYPE_REG
   412  		p.From.Reg = v.Args[0].Reg()
   413  		p.To.Type = obj.TYPE_CONST
   414  		p.To.Offset = v.AuxInt
   415  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_CONST
   418  		p.From.Offset = v.AuxInt
   419  		p.To.Type = obj.TYPE_REG
   420  		p.To.Reg = v.Args[0].Reg()
   421  	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   422  		p := s.Prog(v.Op.Asm())
   423  		p.From.Type = obj.TYPE_MEM
   424  		p.From.Reg = v.Args[0].Reg()
   425  		ssagen.AddAux(&p.From, v)
   426  		p.To.Type = obj.TYPE_REG
   427  		p.To.Reg = v.Args[1].Reg()
   428  	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   429  		sc := v.AuxValAndOff()
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_MEM
   432  		p.From.Reg = v.Args[0].Reg()
   433  		ssagen.AddAux2(&p.From, v, sc.Off64())
   434  		p.To.Type = obj.TYPE_CONST
   435  		p.To.Offset = sc.Val64()
   436  	case ssa.Op386MOVLconst:
   437  		x := v.Reg()
   438  
   439  		// If flags aren't live (indicated by v.Aux == nil),
   440  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   441  		if v.AuxInt == 0 && v.Aux == nil {
   442  			p := s.Prog(x86.AXORL)
   443  			p.From.Type = obj.TYPE_REG
   444  			p.From.Reg = x
   445  			p.To.Type = obj.TYPE_REG
   446  			p.To.Reg = x
   447  			break
   448  		}
   449  
   450  		p := s.Prog(v.Op.Asm())
   451  		p.From.Type = obj.TYPE_CONST
   452  		p.From.Offset = v.AuxInt
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = x
   455  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   456  		x := v.Reg()
   457  		p := s.Prog(v.Op.Asm())
   458  		p.From.Type = obj.TYPE_FCONST
   459  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   460  		p.To.Type = obj.TYPE_REG
   461  		p.To.Reg = x
   462  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   463  		p := s.Prog(x86.ALEAL)
   464  		p.From.Type = obj.TYPE_MEM
   465  		p.From.Name = obj.NAME_EXTERN
   466  		f := math.Float64frombits(uint64(v.AuxInt))
   467  		if v.Op == ssa.Op386MOVSDconst1 {
   468  			p.From.Sym = base.Ctxt.Float64Sym(f)
   469  		} else {
   470  			p.From.Sym = base.Ctxt.Float32Sym(float32(f))
   471  		}
   472  		p.To.Type = obj.TYPE_REG
   473  		p.To.Reg = v.Reg()
   474  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   475  		p := s.Prog(v.Op.Asm())
   476  		p.From.Type = obj.TYPE_MEM
   477  		p.From.Reg = v.Args[0].Reg()
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg()
   480  
   481  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = v.Args[0].Reg()
   485  		ssagen.AddAux(&p.From, v)
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = v.Reg()
   488  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   489  		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   490  		r := v.Args[0].Reg()
   491  		i := v.Args[1].Reg()
   492  		p := s.Prog(v.Op.Asm())
   493  		p.From.Type = obj.TYPE_MEM
   494  		switch v.Op {
   495  		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   496  			if i == x86.REG_SP {
   497  				r, i = i, r
   498  			}
   499  			p.From.Scale = 1
   500  		case ssa.Op386MOVSDloadidx8:
   501  			p.From.Scale = 8
   502  		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   503  			p.From.Scale = 4
   504  		case ssa.Op386MOVWloadidx2:
   505  			p.From.Scale = 2
   506  		}
   507  		p.From.Reg = r
   508  		p.From.Index = i
   509  		ssagen.AddAux(&p.From, v)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = v.Reg()
   512  	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   513  		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[1].Reg()
   517  		p.From.Index = v.Args[2].Reg()
   518  		p.From.Scale = 4
   519  		ssagen.AddAux(&p.From, v)
   520  		p.To.Type = obj.TYPE_REG
   521  		p.To.Reg = v.Reg()
   522  	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   523  		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   524  		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   525  		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   526  		p := s.Prog(v.Op.Asm())
   527  		p.From.Type = obj.TYPE_MEM
   528  		p.From.Reg = v.Args[1].Reg()
   529  		ssagen.AddAux(&p.From, v)
   530  		p.To.Type = obj.TYPE_REG
   531  		p.To.Reg = v.Reg()
   532  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   533  		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   534  		p := s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = v.Args[1].Reg()
   537  		p.To.Type = obj.TYPE_MEM
   538  		p.To.Reg = v.Args[0].Reg()
   539  		ssagen.AddAux(&p.To, v)
   540  	case ssa.Op386ADDLconstmodify:
   541  		sc := v.AuxValAndOff()
   542  		val := sc.Val()
   543  		if val == 1 || val == -1 {
   544  			var p *obj.Prog
   545  			if val == 1 {
   546  				p = s.Prog(x86.AINCL)
   547  			} else {
   548  				p = s.Prog(x86.ADECL)
   549  			}
   550  			off := sc.Off64()
   551  			p.To.Type = obj.TYPE_MEM
   552  			p.To.Reg = v.Args[0].Reg()
   553  			ssagen.AddAux2(&p.To, v, off)
   554  			break
   555  		}
   556  		fallthrough
   557  	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   558  		sc := v.AuxValAndOff()
   559  		off := sc.Off64()
   560  		val := sc.Val64()
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_CONST
   563  		p.From.Offset = val
   564  		p.To.Type = obj.TYPE_MEM
   565  		p.To.Reg = v.Args[0].Reg()
   566  		ssagen.AddAux2(&p.To, v, off)
   567  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   568  		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   569  		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   570  		r := v.Args[0].Reg()
   571  		i := v.Args[1].Reg()
   572  		p := s.Prog(v.Op.Asm())
   573  		p.From.Type = obj.TYPE_REG
   574  		p.From.Reg = v.Args[2].Reg()
   575  		p.To.Type = obj.TYPE_MEM
   576  		switch v.Op {
   577  		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   578  			if i == x86.REG_SP {
   579  				r, i = i, r
   580  			}
   581  			p.To.Scale = 1
   582  		case ssa.Op386MOVSDstoreidx8:
   583  			p.To.Scale = 8
   584  		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   585  			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   586  			p.To.Scale = 4
   587  		case ssa.Op386MOVWstoreidx2:
   588  			p.To.Scale = 2
   589  		}
   590  		p.To.Reg = r
   591  		p.To.Index = i
   592  		ssagen.AddAux(&p.To, v)
   593  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   594  		p := s.Prog(v.Op.Asm())
   595  		p.From.Type = obj.TYPE_CONST
   596  		sc := v.AuxValAndOff()
   597  		p.From.Offset = sc.Val64()
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = v.Args[0].Reg()
   600  		ssagen.AddAux2(&p.To, v, sc.Off64())
   601  	case ssa.Op386ADDLconstmodifyidx4:
   602  		sc := v.AuxValAndOff()
   603  		val := sc.Val()
   604  		if val == 1 || val == -1 {
   605  			var p *obj.Prog
   606  			if val == 1 {
   607  				p = s.Prog(x86.AINCL)
   608  			} else {
   609  				p = s.Prog(x86.ADECL)
   610  			}
   611  			off := sc.Off64()
   612  			p.To.Type = obj.TYPE_MEM
   613  			p.To.Reg = v.Args[0].Reg()
   614  			p.To.Scale = 4
   615  			p.To.Index = v.Args[1].Reg()
   616  			ssagen.AddAux2(&p.To, v, off)
   617  			break
   618  		}
   619  		fallthrough
   620  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   621  		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   622  		p := s.Prog(v.Op.Asm())
   623  		p.From.Type = obj.TYPE_CONST
   624  		sc := v.AuxValAndOff()
   625  		p.From.Offset = sc.Val64()
   626  		r := v.Args[0].Reg()
   627  		i := v.Args[1].Reg()
   628  		switch v.Op {
   629  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   630  			p.To.Scale = 1
   631  			if i == x86.REG_SP {
   632  				r, i = i, r
   633  			}
   634  		case ssa.Op386MOVWstoreconstidx2:
   635  			p.To.Scale = 2
   636  		case ssa.Op386MOVLstoreconstidx4,
   637  			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   638  			p.To.Scale = 4
   639  		}
   640  		p.To.Type = obj.TYPE_MEM
   641  		p.To.Reg = r
   642  		p.To.Index = i
   643  		ssagen.AddAux2(&p.To, v, sc.Off64())
   644  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   645  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   646  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   647  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   648  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   649  	case ssa.Op386DUFFZERO:
   650  		p := s.Prog(obj.ADUFFZERO)
   651  		p.To.Type = obj.TYPE_ADDR
   652  		p.To.Sym = ir.Syms.Duffzero
   653  		p.To.Offset = v.AuxInt
   654  	case ssa.Op386DUFFCOPY:
   655  		p := s.Prog(obj.ADUFFCOPY)
   656  		p.To.Type = obj.TYPE_ADDR
   657  		p.To.Sym = ir.Syms.Duffcopy
   658  		p.To.Offset = v.AuxInt
   659  
   660  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   661  		if v.Type.IsMemory() {
   662  			return
   663  		}
   664  		x := v.Args[0].Reg()
   665  		y := v.Reg()
   666  		if x != y {
   667  			opregreg(s, moveByType(v.Type), y, x)
   668  		}
   669  	case ssa.OpLoadReg:
   670  		if v.Type.IsFlags() {
   671  			v.Fatalf("load flags not implemented: %v", v.LongString())
   672  			return
   673  		}
   674  		p := s.Prog(loadByType(v.Type))
   675  		ssagen.AddrAuto(&p.From, v.Args[0])
   676  		p.To.Type = obj.TYPE_REG
   677  		p.To.Reg = v.Reg()
   678  
   679  	case ssa.OpStoreReg:
   680  		if v.Type.IsFlags() {
   681  			v.Fatalf("store flags not implemented: %v", v.LongString())
   682  			return
   683  		}
   684  		p := s.Prog(storeByType(v.Type))
   685  		p.From.Type = obj.TYPE_REG
   686  		p.From.Reg = v.Args[0].Reg()
   687  		ssagen.AddrAuto(&p.To, v)
   688  	case ssa.Op386LoweredGetClosurePtr:
   689  		// Closure pointer is DX.
   690  		ssagen.CheckLoweredGetClosurePtr(v)
   691  	case ssa.Op386LoweredGetG:
   692  		r := v.Reg()
   693  		// See the comments in github.com/go-asm/go/cmd/obj/x86/obj6.go
   694  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   695  		if x86.CanUse1InsnTLS(base.Ctxt) {
   696  			// MOVL (TLS), r
   697  			p := s.Prog(x86.AMOVL)
   698  			p.From.Type = obj.TYPE_MEM
   699  			p.From.Reg = x86.REG_TLS
   700  			p.To.Type = obj.TYPE_REG
   701  			p.To.Reg = r
   702  		} else {
   703  			// MOVL TLS, r
   704  			// MOVL (r)(TLS*1), r
   705  			p := s.Prog(x86.AMOVL)
   706  			p.From.Type = obj.TYPE_REG
   707  			p.From.Reg = x86.REG_TLS
   708  			p.To.Type = obj.TYPE_REG
   709  			p.To.Reg = r
   710  			q := s.Prog(x86.AMOVL)
   711  			q.From.Type = obj.TYPE_MEM
   712  			q.From.Reg = r
   713  			q.From.Index = x86.REG_TLS
   714  			q.From.Scale = 1
   715  			q.To.Type = obj.TYPE_REG
   716  			q.To.Reg = r
   717  		}
   718  
   719  	case ssa.Op386LoweredGetCallerPC:
   720  		p := s.Prog(x86.AMOVL)
   721  		p.From.Type = obj.TYPE_MEM
   722  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   723  		p.From.Name = obj.NAME_PARAM
   724  		p.To.Type = obj.TYPE_REG
   725  		p.To.Reg = v.Reg()
   726  
   727  	case ssa.Op386LoweredGetCallerSP:
   728  		// caller's SP is the address of the first arg
   729  		p := s.Prog(x86.AMOVL)
   730  		p.From.Type = obj.TYPE_ADDR
   731  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on 386, just to be consistent with other architectures
   732  		p.From.Name = obj.NAME_PARAM
   733  		p.To.Type = obj.TYPE_REG
   734  		p.To.Reg = v.Reg()
   735  
   736  	case ssa.Op386LoweredWB:
   737  		p := s.Prog(obj.ACALL)
   738  		p.To.Type = obj.TYPE_MEM
   739  		p.To.Name = obj.NAME_EXTERN
   740  		// AuxInt encodes how many buffer entries we need.
   741  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
   742  
   743  	case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC:
   744  		p := s.Prog(obj.ACALL)
   745  		p.To.Type = obj.TYPE_MEM
   746  		p.To.Name = obj.NAME_EXTERN
   747  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
   748  		s.UseArgs(8) // space used in callee args area by assembly stubs
   749  
   750  	case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC:
   751  		p := s.Prog(obj.ACALL)
   752  		p.To.Type = obj.TYPE_MEM
   753  		p.To.Name = obj.NAME_EXTERN
   754  		p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt]
   755  		s.UseArgs(12) // space used in callee args area by assembly stubs
   756  
   757  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   758  		s.Call(v)
   759  	case ssa.Op386CALLtail:
   760  		s.TailCall(v)
   761  	case ssa.Op386NEGL,
   762  		ssa.Op386BSWAPL,
   763  		ssa.Op386NOTL:
   764  		p := s.Prog(v.Op.Asm())
   765  		p.To.Type = obj.TYPE_REG
   766  		p.To.Reg = v.Reg()
   767  	case ssa.Op386BSFL, ssa.Op386BSFW,
   768  		ssa.Op386BSRL, ssa.Op386BSRW,
   769  		ssa.Op386SQRTSS, ssa.Op386SQRTSD:
   770  		p := s.Prog(v.Op.Asm())
   771  		p.From.Type = obj.TYPE_REG
   772  		p.From.Reg = v.Args[0].Reg()
   773  		p.To.Type = obj.TYPE_REG
   774  		p.To.Reg = v.Reg()
   775  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   776  		ssa.Op386SETL, ssa.Op386SETLE,
   777  		ssa.Op386SETG, ssa.Op386SETGE,
   778  		ssa.Op386SETGF, ssa.Op386SETGEF,
   779  		ssa.Op386SETB, ssa.Op386SETBE,
   780  		ssa.Op386SETORD, ssa.Op386SETNAN,
   781  		ssa.Op386SETA, ssa.Op386SETAE,
   782  		ssa.Op386SETO:
   783  		p := s.Prog(v.Op.Asm())
   784  		p.To.Type = obj.TYPE_REG
   785  		p.To.Reg = v.Reg()
   786  
   787  	case ssa.Op386SETNEF:
   788  		p := s.Prog(v.Op.Asm())
   789  		p.To.Type = obj.TYPE_REG
   790  		p.To.Reg = v.Reg()
   791  		q := s.Prog(x86.ASETPS)
   792  		q.To.Type = obj.TYPE_REG
   793  		q.To.Reg = x86.REG_AX
   794  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   795  
   796  	case ssa.Op386SETEQF:
   797  		p := s.Prog(v.Op.Asm())
   798  		p.To.Type = obj.TYPE_REG
   799  		p.To.Reg = v.Reg()
   800  		q := s.Prog(x86.ASETPC)
   801  		q.To.Type = obj.TYPE_REG
   802  		q.To.Reg = x86.REG_AX
   803  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   804  
   805  	case ssa.Op386InvertFlags:
   806  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   807  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   808  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   809  	case ssa.Op386REPSTOSL:
   810  		s.Prog(x86.AREP)
   811  		s.Prog(x86.ASTOSL)
   812  	case ssa.Op386REPMOVSL:
   813  		s.Prog(x86.AREP)
   814  		s.Prog(x86.AMOVSL)
   815  	case ssa.Op386LoweredNilCheck:
   816  		// Issue a load which will fault if the input is nil.
   817  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   818  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   819  		// but it doesn't have false dependency on AX.
   820  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   821  		// That trades clobbering flags for clobbering a register.
   822  		p := s.Prog(x86.ATESTB)
   823  		p.From.Type = obj.TYPE_REG
   824  		p.From.Reg = x86.REG_AX
   825  		p.To.Type = obj.TYPE_MEM
   826  		p.To.Reg = v.Args[0].Reg()
   827  		ssagen.AddAux(&p.To, v)
   828  		if logopt.Enabled() {
   829  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
   830  		}
   831  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   832  			base.WarnfAt(v.Pos, "generated nil check")
   833  		}
   834  	case ssa.Op386LoweredCtz32:
   835  		// BSFL in, out
   836  		p := s.Prog(x86.ABSFL)
   837  		p.From.Type = obj.TYPE_REG
   838  		p.From.Reg = v.Args[0].Reg()
   839  		p.To.Type = obj.TYPE_REG
   840  		p.To.Reg = v.Reg()
   841  
   842  		// JNZ 2(PC)
   843  		p1 := s.Prog(x86.AJNE)
   844  		p1.To.Type = obj.TYPE_BRANCH
   845  
   846  		// MOVL $32, out
   847  		p2 := s.Prog(x86.AMOVL)
   848  		p2.From.Type = obj.TYPE_CONST
   849  		p2.From.Offset = 32
   850  		p2.To.Type = obj.TYPE_REG
   851  		p2.To.Reg = v.Reg()
   852  
   853  		// NOP (so the JNZ has somewhere to land)
   854  		nop := s.Prog(obj.ANOP)
   855  		p1.To.SetTarget(nop)
   856  
   857  	case ssa.OpClobber:
   858  		p := s.Prog(x86.AMOVL)
   859  		p.From.Type = obj.TYPE_CONST
   860  		p.From.Offset = 0xdeaddead
   861  		p.To.Type = obj.TYPE_MEM
   862  		p.To.Reg = x86.REG_SP
   863  		ssagen.AddAux(&p.To, v)
   864  	case ssa.OpClobberReg:
   865  		// TODO: implement for clobberdead experiment. Nop is ok for now.
   866  	default:
   867  		v.Fatalf("genValue not implemented: %s", v.LongString())
   868  	}
   869  }
   870  
   871  var blockJump = [...]struct {
   872  	asm, invasm obj.As
   873  }{
   874  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   875  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   876  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   877  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   878  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   879  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   880  	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
   881  	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
   882  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   883  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   884  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   885  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   886  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   887  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   888  }
   889  
   890  var eqfJumps = [2][2]ssagen.IndexJump{
   891  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   892  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   893  }
   894  var nefJumps = [2][2]ssagen.IndexJump{
   895  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   896  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   897  }
   898  
   899  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
   900  	switch b.Kind {
   901  	case ssa.BlockPlain:
   902  		if b.Succs[0].Block() != next {
   903  			p := s.Prog(obj.AJMP)
   904  			p.To.Type = obj.TYPE_BRANCH
   905  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
   906  		}
   907  	case ssa.BlockDefer:
   908  		// defer returns in rax:
   909  		// 0 if we should continue executing
   910  		// 1 if we should jump to deferreturn call
   911  		p := s.Prog(x86.ATESTL)
   912  		p.From.Type = obj.TYPE_REG
   913  		p.From.Reg = x86.REG_AX
   914  		p.To.Type = obj.TYPE_REG
   915  		p.To.Reg = x86.REG_AX
   916  		p = s.Prog(x86.AJNE)
   917  		p.To.Type = obj.TYPE_BRANCH
   918  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
   919  		if b.Succs[0].Block() != next {
   920  			p := s.Prog(obj.AJMP)
   921  			p.To.Type = obj.TYPE_BRANCH
   922  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
   923  		}
   924  	case ssa.BlockExit, ssa.BlockRetJmp:
   925  	case ssa.BlockRet:
   926  		s.Prog(obj.ARET)
   927  
   928  	case ssa.Block386EQF:
   929  		s.CombJump(b, next, &eqfJumps)
   930  
   931  	case ssa.Block386NEF:
   932  		s.CombJump(b, next, &nefJumps)
   933  
   934  	case ssa.Block386EQ, ssa.Block386NE,
   935  		ssa.Block386LT, ssa.Block386GE,
   936  		ssa.Block386LE, ssa.Block386GT,
   937  		ssa.Block386OS, ssa.Block386OC,
   938  		ssa.Block386ULT, ssa.Block386UGT,
   939  		ssa.Block386ULE, ssa.Block386UGE:
   940  		jmp := blockJump[b.Kind]
   941  		switch next {
   942  		case b.Succs[0].Block():
   943  			s.Br(jmp.invasm, b.Succs[1].Block())
   944  		case b.Succs[1].Block():
   945  			s.Br(jmp.asm, b.Succs[0].Block())
   946  		default:
   947  			if b.Likely != ssa.BranchUnlikely {
   948  				s.Br(jmp.asm, b.Succs[0].Block())
   949  				s.Br(obj.AJMP, b.Succs[1].Block())
   950  			} else {
   951  				s.Br(jmp.invasm, b.Succs[1].Block())
   952  				s.Br(obj.AJMP, b.Succs[0].Block())
   953  			}
   954  		}
   955  	default:
   956  		b.Fatalf("branch not implemented: %s", b.LongString())
   957  	}
   958  }