github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"github.com/gagliardetto/golang-go/cmd/compile/internal/gc"
    12  	"github.com/gagliardetto/golang-go/cmd/compile/internal/logopt"
    13  	"github.com/gagliardetto/golang-go/cmd/compile/internal/ssa"
    14  	"github.com/gagliardetto/golang-go/cmd/compile/internal/types"
    15  	"github.com/gagliardetto/golang-go/cmd/internal/obj"
    16  	"github.com/gagliardetto/golang-go/cmd/internal/obj/x86"
    17  )
    18  
    19  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    20  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    21  	flive := b.FlagsLiveAtEnd
    22  	for _, c := range b.ControlValues() {
    23  		flive = c.Type.IsFlags() || flive
    24  	}
    25  	for i := len(b.Values) - 1; i >= 0; i-- {
    26  		v := b.Values[i]
    27  		if flive && v.Op == ssa.Op386MOVLconst {
    28  			// The "mark" is any non-nil Aux value.
    29  			v.Aux = v
    30  		}
    31  		if v.Type.IsFlags() {
    32  			flive = false
    33  		}
    34  		for _, a := range v.Args {
    35  			if a.Type.IsFlags() {
    36  				flive = true
    37  			}
    38  		}
    39  	}
    40  }
    41  
    42  // loadByType returns the load instruction of the given type.
    43  func loadByType(t *types.Type) obj.As {
    44  	// Avoid partial register write
    45  	if !t.IsFloat() && t.Size() <= 2 {
    46  		if t.Size() == 1 {
    47  			return x86.AMOVBLZX
    48  		} else {
    49  			return x86.AMOVWLZX
    50  		}
    51  	}
    52  	// Otherwise, there's no difference between load and store opcodes.
    53  	return storeByType(t)
    54  }
    55  
    56  // storeByType returns the store instruction of the given type.
    57  func storeByType(t *types.Type) obj.As {
    58  	width := t.Size()
    59  	if t.IsFloat() {
    60  		switch width {
    61  		case 4:
    62  			return x86.AMOVSS
    63  		case 8:
    64  			return x86.AMOVSD
    65  		}
    66  	} else {
    67  		switch width {
    68  		case 1:
    69  			return x86.AMOVB
    70  		case 2:
    71  			return x86.AMOVW
    72  		case 4:
    73  			return x86.AMOVL
    74  		}
    75  	}
    76  	panic("bad store type")
    77  }
    78  
    79  // moveByType returns the reg->reg move instruction of the given type.
    80  func moveByType(t *types.Type) obj.As {
    81  	if t.IsFloat() {
    82  		switch t.Size() {
    83  		case 4:
    84  			return x86.AMOVSS
    85  		case 8:
    86  			return x86.AMOVSD
    87  		default:
    88  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    89  		}
    90  	} else {
    91  		switch t.Size() {
    92  		case 1:
    93  			// Avoids partial register write
    94  			return x86.AMOVL
    95  		case 2:
    96  			return x86.AMOVL
    97  		case 4:
    98  			return x86.AMOVL
    99  		default:
   100  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   101  		}
   102  	}
   103  }
   104  
   105  // opregreg emits instructions for
   106  //     dest := dest(To) op src(From)
   107  // and also returns the created obj.Prog so it
   108  // may be further adjusted (offset, scale, etc).
   109  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   110  	p := s.Prog(op)
   111  	p.From.Type = obj.TYPE_REG
   112  	p.To.Type = obj.TYPE_REG
   113  	p.To.Reg = dest
   114  	p.From.Reg = src
   115  	return p
   116  }
   117  
   118  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   119  	switch v.Op {
   120  	case ssa.Op386ADDL:
   121  		r := v.Reg()
   122  		r1 := v.Args[0].Reg()
   123  		r2 := v.Args[1].Reg()
   124  		switch {
   125  		case r == r1:
   126  			p := s.Prog(v.Op.Asm())
   127  			p.From.Type = obj.TYPE_REG
   128  			p.From.Reg = r2
   129  			p.To.Type = obj.TYPE_REG
   130  			p.To.Reg = r
   131  		case r == r2:
   132  			p := s.Prog(v.Op.Asm())
   133  			p.From.Type = obj.TYPE_REG
   134  			p.From.Reg = r1
   135  			p.To.Type = obj.TYPE_REG
   136  			p.To.Reg = r
   137  		default:
   138  			p := s.Prog(x86.ALEAL)
   139  			p.From.Type = obj.TYPE_MEM
   140  			p.From.Reg = r1
   141  			p.From.Scale = 1
   142  			p.From.Index = r2
   143  			p.To.Type = obj.TYPE_REG
   144  			p.To.Reg = r
   145  		}
   146  
   147  	// 2-address opcode arithmetic
   148  	case ssa.Op386SUBL,
   149  		ssa.Op386MULL,
   150  		ssa.Op386ANDL,
   151  		ssa.Op386ORL,
   152  		ssa.Op386XORL,
   153  		ssa.Op386SHLL,
   154  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   155  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   156  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   157  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   158  		ssa.Op386PXOR,
   159  		ssa.Op386ADCL,
   160  		ssa.Op386SBBL:
   161  		r := v.Reg()
   162  		if r != v.Args[0].Reg() {
   163  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   164  		}
   165  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   166  
   167  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   168  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   169  		r := v.Reg0()
   170  		if r != v.Args[0].Reg() {
   171  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   172  		}
   173  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   174  
   175  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   176  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   177  		r := v.Reg0()
   178  		if r != v.Args[0].Reg() {
   179  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   180  		}
   181  		p := s.Prog(v.Op.Asm())
   182  		p.From.Type = obj.TYPE_CONST
   183  		p.From.Offset = v.AuxInt
   184  		p.To.Type = obj.TYPE_REG
   185  		p.To.Reg = r
   186  
   187  	case ssa.Op386DIVL, ssa.Op386DIVW,
   188  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   189  		ssa.Op386MODL, ssa.Op386MODW,
   190  		ssa.Op386MODLU, ssa.Op386MODWU:
   191  
   192  		// Arg[0] is already in AX as it's the only register we allow
   193  		// and AX is the only output
   194  		x := v.Args[1].Reg()
   195  
   196  		// CPU faults upon signed overflow, which occurs when most
   197  		// negative int is divided by -1.
   198  		var j *obj.Prog
   199  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   200  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   201  
   202  			if ssa.NeedsFixUp(v) {
   203  				var c *obj.Prog
   204  				switch v.Op {
   205  				case ssa.Op386DIVL, ssa.Op386MODL:
   206  					c = s.Prog(x86.ACMPL)
   207  					j = s.Prog(x86.AJEQ)
   208  
   209  				case ssa.Op386DIVW, ssa.Op386MODW:
   210  					c = s.Prog(x86.ACMPW)
   211  					j = s.Prog(x86.AJEQ)
   212  				}
   213  				c.From.Type = obj.TYPE_REG
   214  				c.From.Reg = x
   215  				c.To.Type = obj.TYPE_CONST
   216  				c.To.Offset = -1
   217  
   218  				j.To.Type = obj.TYPE_BRANCH
   219  			}
   220  			// sign extend the dividend
   221  			switch v.Op {
   222  			case ssa.Op386DIVL, ssa.Op386MODL:
   223  				s.Prog(x86.ACDQ)
   224  			case ssa.Op386DIVW, ssa.Op386MODW:
   225  				s.Prog(x86.ACWD)
   226  			}
   227  		}
   228  
   229  		// for unsigned ints, we sign extend by setting DX = 0
   230  		// signed ints were sign extended above
   231  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   232  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   233  			c := s.Prog(x86.AXORL)
   234  			c.From.Type = obj.TYPE_REG
   235  			c.From.Reg = x86.REG_DX
   236  			c.To.Type = obj.TYPE_REG
   237  			c.To.Reg = x86.REG_DX
   238  		}
   239  
   240  		p := s.Prog(v.Op.Asm())
   241  		p.From.Type = obj.TYPE_REG
   242  		p.From.Reg = x
   243  
   244  		// signed division, rest of the check for -1 case
   245  		if j != nil {
   246  			j2 := s.Prog(obj.AJMP)
   247  			j2.To.Type = obj.TYPE_BRANCH
   248  
   249  			var n *obj.Prog
   250  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   251  				// n * -1 = -n
   252  				n = s.Prog(x86.ANEGL)
   253  				n.To.Type = obj.TYPE_REG
   254  				n.To.Reg = x86.REG_AX
   255  			} else {
   256  				// n % -1 == 0
   257  				n = s.Prog(x86.AXORL)
   258  				n.From.Type = obj.TYPE_REG
   259  				n.From.Reg = x86.REG_DX
   260  				n.To.Type = obj.TYPE_REG
   261  				n.To.Reg = x86.REG_DX
   262  			}
   263  
   264  			j.To.Val = n
   265  			j2.To.Val = s.Pc()
   266  		}
   267  
   268  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   269  		// the frontend rewrites constant division by 8/16/32 bit integers into
   270  		// HMUL by a constant
   271  		// SSA rewrites generate the 64 bit versions
   272  
   273  		// Arg[0] is already in AX as it's the only register we allow
   274  		// and DX is the only output we care about (the high bits)
   275  		p := s.Prog(v.Op.Asm())
   276  		p.From.Type = obj.TYPE_REG
   277  		p.From.Reg = v.Args[1].Reg()
   278  
   279  		// IMULB puts the high portion in AH instead of DL,
   280  		// so move it to DL for consistency
   281  		if v.Type.Size() == 1 {
   282  			m := s.Prog(x86.AMOVB)
   283  			m.From.Type = obj.TYPE_REG
   284  			m.From.Reg = x86.REG_AH
   285  			m.To.Type = obj.TYPE_REG
   286  			m.To.Reg = x86.REG_DX
   287  		}
   288  
   289  	case ssa.Op386MULLU:
   290  		// Arg[0] is already in AX as it's the only register we allow
   291  		// results lo in AX
   292  		p := s.Prog(v.Op.Asm())
   293  		p.From.Type = obj.TYPE_REG
   294  		p.From.Reg = v.Args[1].Reg()
   295  
   296  	case ssa.Op386MULLQU:
   297  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   298  		p := s.Prog(v.Op.Asm())
   299  		p.From.Type = obj.TYPE_REG
   300  		p.From.Reg = v.Args[1].Reg()
   301  
   302  	case ssa.Op386AVGLU:
   303  		// compute (x+y)/2 unsigned.
   304  		// Do a 32-bit add, the overflow goes into the carry.
   305  		// Shift right once and pull the carry back into the 31st bit.
   306  		r := v.Reg()
   307  		if r != v.Args[0].Reg() {
   308  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   309  		}
   310  		p := s.Prog(x86.AADDL)
   311  		p.From.Type = obj.TYPE_REG
   312  		p.To.Type = obj.TYPE_REG
   313  		p.To.Reg = r
   314  		p.From.Reg = v.Args[1].Reg()
   315  		p = s.Prog(x86.ARCRL)
   316  		p.From.Type = obj.TYPE_CONST
   317  		p.From.Offset = 1
   318  		p.To.Type = obj.TYPE_REG
   319  		p.To.Reg = r
   320  
   321  	case ssa.Op386ADDLconst:
   322  		r := v.Reg()
   323  		a := v.Args[0].Reg()
   324  		if r == a {
   325  			if v.AuxInt == 1 {
   326  				p := s.Prog(x86.AINCL)
   327  				p.To.Type = obj.TYPE_REG
   328  				p.To.Reg = r
   329  				return
   330  			}
   331  			if v.AuxInt == -1 {
   332  				p := s.Prog(x86.ADECL)
   333  				p.To.Type = obj.TYPE_REG
   334  				p.To.Reg = r
   335  				return
   336  			}
   337  			p := s.Prog(v.Op.Asm())
   338  			p.From.Type = obj.TYPE_CONST
   339  			p.From.Offset = v.AuxInt
   340  			p.To.Type = obj.TYPE_REG
   341  			p.To.Reg = r
   342  			return
   343  		}
   344  		p := s.Prog(x86.ALEAL)
   345  		p.From.Type = obj.TYPE_MEM
   346  		p.From.Reg = a
   347  		p.From.Offset = v.AuxInt
   348  		p.To.Type = obj.TYPE_REG
   349  		p.To.Reg = r
   350  
   351  	case ssa.Op386MULLconst:
   352  		r := v.Reg()
   353  		p := s.Prog(v.Op.Asm())
   354  		p.From.Type = obj.TYPE_CONST
   355  		p.From.Offset = v.AuxInt
   356  		p.To.Type = obj.TYPE_REG
   357  		p.To.Reg = r
   358  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   359  
   360  	case ssa.Op386SUBLconst,
   361  		ssa.Op386ADCLconst,
   362  		ssa.Op386SBBLconst,
   363  		ssa.Op386ANDLconst,
   364  		ssa.Op386ORLconst,
   365  		ssa.Op386XORLconst,
   366  		ssa.Op386SHLLconst,
   367  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   368  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   369  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   370  		r := v.Reg()
   371  		if r != v.Args[0].Reg() {
   372  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   373  		}
   374  		p := s.Prog(v.Op.Asm())
   375  		p.From.Type = obj.TYPE_CONST
   376  		p.From.Offset = v.AuxInt
   377  		p.To.Type = obj.TYPE_REG
   378  		p.To.Reg = r
   379  	case ssa.Op386SBBLcarrymask:
   380  		r := v.Reg()
   381  		p := s.Prog(v.Op.Asm())
   382  		p.From.Type = obj.TYPE_REG
   383  		p.From.Reg = r
   384  		p.To.Type = obj.TYPE_REG
   385  		p.To.Reg = r
   386  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   387  		r := v.Args[0].Reg()
   388  		i := v.Args[1].Reg()
   389  		p := s.Prog(x86.ALEAL)
   390  		switch v.Op {
   391  		case ssa.Op386LEAL1:
   392  			p.From.Scale = 1
   393  			if i == x86.REG_SP {
   394  				r, i = i, r
   395  			}
   396  		case ssa.Op386LEAL2:
   397  			p.From.Scale = 2
   398  		case ssa.Op386LEAL4:
   399  			p.From.Scale = 4
   400  		case ssa.Op386LEAL8:
   401  			p.From.Scale = 8
   402  		}
   403  		p.From.Type = obj.TYPE_MEM
   404  		p.From.Reg = r
   405  		p.From.Index = i
   406  		gc.AddAux(&p.From, v)
   407  		p.To.Type = obj.TYPE_REG
   408  		p.To.Reg = v.Reg()
   409  	case ssa.Op386LEAL:
   410  		p := s.Prog(x86.ALEAL)
   411  		p.From.Type = obj.TYPE_MEM
   412  		p.From.Reg = v.Args[0].Reg()
   413  		gc.AddAux(&p.From, v)
   414  		p.To.Type = obj.TYPE_REG
   415  		p.To.Reg = v.Reg()
   416  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   417  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   418  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   419  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   420  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   421  		// must account for that right here.
   422  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   423  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   424  		p := s.Prog(v.Op.Asm())
   425  		p.From.Type = obj.TYPE_REG
   426  		p.From.Reg = v.Args[0].Reg()
   427  		p.To.Type = obj.TYPE_CONST
   428  		p.To.Offset = v.AuxInt
   429  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   430  		p := s.Prog(v.Op.Asm())
   431  		p.From.Type = obj.TYPE_CONST
   432  		p.From.Offset = v.AuxInt
   433  		p.To.Type = obj.TYPE_REG
   434  		p.To.Reg = v.Args[0].Reg()
   435  	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   436  		p := s.Prog(v.Op.Asm())
   437  		p.From.Type = obj.TYPE_MEM
   438  		p.From.Reg = v.Args[0].Reg()
   439  		gc.AddAux(&p.From, v)
   440  		p.To.Type = obj.TYPE_REG
   441  		p.To.Reg = v.Args[1].Reg()
   442  	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   443  		sc := v.AuxValAndOff()
   444  		p := s.Prog(v.Op.Asm())
   445  		p.From.Type = obj.TYPE_MEM
   446  		p.From.Reg = v.Args[0].Reg()
   447  		gc.AddAux2(&p.From, v, sc.Off())
   448  		p.To.Type = obj.TYPE_CONST
   449  		p.To.Offset = sc.Val()
   450  	case ssa.Op386MOVLconst:
   451  		x := v.Reg()
   452  
   453  		// If flags aren't live (indicated by v.Aux == nil),
   454  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   455  		if v.AuxInt == 0 && v.Aux == nil {
   456  			p := s.Prog(x86.AXORL)
   457  			p.From.Type = obj.TYPE_REG
   458  			p.From.Reg = x
   459  			p.To.Type = obj.TYPE_REG
   460  			p.To.Reg = x
   461  			break
   462  		}
   463  
   464  		p := s.Prog(v.Op.Asm())
   465  		p.From.Type = obj.TYPE_CONST
   466  		p.From.Offset = v.AuxInt
   467  		p.To.Type = obj.TYPE_REG
   468  		p.To.Reg = x
   469  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   470  		x := v.Reg()
   471  		p := s.Prog(v.Op.Asm())
   472  		p.From.Type = obj.TYPE_FCONST
   473  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   474  		p.To.Type = obj.TYPE_REG
   475  		p.To.Reg = x
   476  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   477  		p := s.Prog(x86.ALEAL)
   478  		p.From.Type = obj.TYPE_MEM
   479  		p.From.Name = obj.NAME_EXTERN
   480  		f := math.Float64frombits(uint64(v.AuxInt))
   481  		if v.Op == ssa.Op386MOVSDconst1 {
   482  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   483  		} else {
   484  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   485  		}
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = v.Reg()
   488  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   489  		p := s.Prog(v.Op.Asm())
   490  		p.From.Type = obj.TYPE_MEM
   491  		p.From.Reg = v.Args[0].Reg()
   492  		p.To.Type = obj.TYPE_REG
   493  		p.To.Reg = v.Reg()
   494  
   495  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   496  		p := s.Prog(v.Op.Asm())
   497  		p.From.Type = obj.TYPE_MEM
   498  		p.From.Reg = v.Args[0].Reg()
   499  		gc.AddAux(&p.From, v)
   500  		p.To.Type = obj.TYPE_REG
   501  		p.To.Reg = v.Reg()
   502  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   503  		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   504  		r := v.Args[0].Reg()
   505  		i := v.Args[1].Reg()
   506  		p := s.Prog(v.Op.Asm())
   507  		p.From.Type = obj.TYPE_MEM
   508  		switch v.Op {
   509  		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   510  			if i == x86.REG_SP {
   511  				r, i = i, r
   512  			}
   513  			p.From.Scale = 1
   514  		case ssa.Op386MOVSDloadidx8:
   515  			p.From.Scale = 8
   516  		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   517  			p.From.Scale = 4
   518  		case ssa.Op386MOVWloadidx2:
   519  			p.From.Scale = 2
   520  		}
   521  		p.From.Reg = r
   522  		p.From.Index = i
   523  		gc.AddAux(&p.From, v)
   524  		p.To.Type = obj.TYPE_REG
   525  		p.To.Reg = v.Reg()
   526  	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   527  		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   528  		p := s.Prog(v.Op.Asm())
   529  		p.From.Type = obj.TYPE_MEM
   530  		p.From.Reg = v.Args[1].Reg()
   531  		p.From.Index = v.Args[2].Reg()
   532  		p.From.Scale = 4
   533  		gc.AddAux(&p.From, v)
   534  		p.To.Type = obj.TYPE_REG
   535  		p.To.Reg = v.Reg()
   536  		if v.Reg() != v.Args[0].Reg() {
   537  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   538  		}
   539  	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   540  		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   541  		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   542  		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   543  		p := s.Prog(v.Op.Asm())
   544  		p.From.Type = obj.TYPE_MEM
   545  		p.From.Reg = v.Args[1].Reg()
   546  		gc.AddAux(&p.From, v)
   547  		p.To.Type = obj.TYPE_REG
   548  		p.To.Reg = v.Reg()
   549  		if v.Reg() != v.Args[0].Reg() {
   550  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   551  		}
   552  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   553  		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   554  		p := s.Prog(v.Op.Asm())
   555  		p.From.Type = obj.TYPE_REG
   556  		p.From.Reg = v.Args[1].Reg()
   557  		p.To.Type = obj.TYPE_MEM
   558  		p.To.Reg = v.Args[0].Reg()
   559  		gc.AddAux(&p.To, v)
   560  	case ssa.Op386ADDLconstmodify:
   561  		sc := v.AuxValAndOff()
   562  		val := sc.Val()
   563  		if val == 1 || val == -1 {
   564  			var p *obj.Prog
   565  			if val == 1 {
   566  				p = s.Prog(x86.AINCL)
   567  			} else {
   568  				p = s.Prog(x86.ADECL)
   569  			}
   570  			off := sc.Off()
   571  			p.To.Type = obj.TYPE_MEM
   572  			p.To.Reg = v.Args[0].Reg()
   573  			gc.AddAux2(&p.To, v, off)
   574  			break
   575  		}
   576  		fallthrough
   577  	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   578  		sc := v.AuxValAndOff()
   579  		off := sc.Off()
   580  		val := sc.Val()
   581  		p := s.Prog(v.Op.Asm())
   582  		p.From.Type = obj.TYPE_CONST
   583  		p.From.Offset = val
   584  		p.To.Type = obj.TYPE_MEM
   585  		p.To.Reg = v.Args[0].Reg()
   586  		gc.AddAux2(&p.To, v, off)
   587  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   588  		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   589  		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   590  		r := v.Args[0].Reg()
   591  		i := v.Args[1].Reg()
   592  		p := s.Prog(v.Op.Asm())
   593  		p.From.Type = obj.TYPE_REG
   594  		p.From.Reg = v.Args[2].Reg()
   595  		p.To.Type = obj.TYPE_MEM
   596  		switch v.Op {
   597  		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   598  			if i == x86.REG_SP {
   599  				r, i = i, r
   600  			}
   601  			p.To.Scale = 1
   602  		case ssa.Op386MOVSDstoreidx8:
   603  			p.To.Scale = 8
   604  		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   605  			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   606  			p.To.Scale = 4
   607  		case ssa.Op386MOVWstoreidx2:
   608  			p.To.Scale = 2
   609  		}
   610  		p.To.Reg = r
   611  		p.To.Index = i
   612  		gc.AddAux(&p.To, v)
   613  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   614  		p := s.Prog(v.Op.Asm())
   615  		p.From.Type = obj.TYPE_CONST
   616  		sc := v.AuxValAndOff()
   617  		p.From.Offset = sc.Val()
   618  		p.To.Type = obj.TYPE_MEM
   619  		p.To.Reg = v.Args[0].Reg()
   620  		gc.AddAux2(&p.To, v, sc.Off())
   621  	case ssa.Op386ADDLconstmodifyidx4:
   622  		sc := v.AuxValAndOff()
   623  		val := sc.Val()
   624  		if val == 1 || val == -1 {
   625  			var p *obj.Prog
   626  			if val == 1 {
   627  				p = s.Prog(x86.AINCL)
   628  			} else {
   629  				p = s.Prog(x86.ADECL)
   630  			}
   631  			off := sc.Off()
   632  			p.To.Type = obj.TYPE_MEM
   633  			p.To.Reg = v.Args[0].Reg()
   634  			p.To.Scale = 4
   635  			p.To.Index = v.Args[1].Reg()
   636  			gc.AddAux2(&p.To, v, off)
   637  			break
   638  		}
   639  		fallthrough
   640  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   641  		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   642  		p := s.Prog(v.Op.Asm())
   643  		p.From.Type = obj.TYPE_CONST
   644  		sc := v.AuxValAndOff()
   645  		p.From.Offset = sc.Val()
   646  		r := v.Args[0].Reg()
   647  		i := v.Args[1].Reg()
   648  		switch v.Op {
   649  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   650  			p.To.Scale = 1
   651  			if i == x86.REG_SP {
   652  				r, i = i, r
   653  			}
   654  		case ssa.Op386MOVWstoreconstidx2:
   655  			p.To.Scale = 2
   656  		case ssa.Op386MOVLstoreconstidx4,
   657  			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   658  			p.To.Scale = 4
   659  		}
   660  		p.To.Type = obj.TYPE_MEM
   661  		p.To.Reg = r
   662  		p.To.Index = i
   663  		gc.AddAux2(&p.To, v, sc.Off())
   664  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   665  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   666  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   667  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   668  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   669  	case ssa.Op386DUFFZERO:
   670  		p := s.Prog(obj.ADUFFZERO)
   671  		p.To.Type = obj.TYPE_ADDR
   672  		p.To.Sym = gc.Duffzero
   673  		p.To.Offset = v.AuxInt
   674  	case ssa.Op386DUFFCOPY:
   675  		p := s.Prog(obj.ADUFFCOPY)
   676  		p.To.Type = obj.TYPE_ADDR
   677  		p.To.Sym = gc.Duffcopy
   678  		p.To.Offset = v.AuxInt
   679  
   680  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   681  		if v.Type.IsMemory() {
   682  			return
   683  		}
   684  		x := v.Args[0].Reg()
   685  		y := v.Reg()
   686  		if x != y {
   687  			opregreg(s, moveByType(v.Type), y, x)
   688  		}
   689  	case ssa.OpLoadReg:
   690  		if v.Type.IsFlags() {
   691  			v.Fatalf("load flags not implemented: %v", v.LongString())
   692  			return
   693  		}
   694  		p := s.Prog(loadByType(v.Type))
   695  		gc.AddrAuto(&p.From, v.Args[0])
   696  		p.To.Type = obj.TYPE_REG
   697  		p.To.Reg = v.Reg()
   698  
   699  	case ssa.OpStoreReg:
   700  		if v.Type.IsFlags() {
   701  			v.Fatalf("store flags not implemented: %v", v.LongString())
   702  			return
   703  		}
   704  		p := s.Prog(storeByType(v.Type))
   705  		p.From.Type = obj.TYPE_REG
   706  		p.From.Reg = v.Args[0].Reg()
   707  		gc.AddrAuto(&p.To, v)
   708  	case ssa.Op386LoweredGetClosurePtr:
   709  		// Closure pointer is DX.
   710  		gc.CheckLoweredGetClosurePtr(v)
   711  	case ssa.Op386LoweredGetG:
   712  		r := v.Reg()
   713  		// See the comments in cmd/internal/obj/x86/obj6.go
   714  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   715  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   716  			// MOVL (TLS), r
   717  			p := s.Prog(x86.AMOVL)
   718  			p.From.Type = obj.TYPE_MEM
   719  			p.From.Reg = x86.REG_TLS
   720  			p.To.Type = obj.TYPE_REG
   721  			p.To.Reg = r
   722  		} else {
   723  			// MOVL TLS, r
   724  			// MOVL (r)(TLS*1), r
   725  			p := s.Prog(x86.AMOVL)
   726  			p.From.Type = obj.TYPE_REG
   727  			p.From.Reg = x86.REG_TLS
   728  			p.To.Type = obj.TYPE_REG
   729  			p.To.Reg = r
   730  			q := s.Prog(x86.AMOVL)
   731  			q.From.Type = obj.TYPE_MEM
   732  			q.From.Reg = r
   733  			q.From.Index = x86.REG_TLS
   734  			q.From.Scale = 1
   735  			q.To.Type = obj.TYPE_REG
   736  			q.To.Reg = r
   737  		}
   738  
   739  	case ssa.Op386LoweredGetCallerPC:
   740  		p := s.Prog(x86.AMOVL)
   741  		p.From.Type = obj.TYPE_MEM
   742  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   743  		p.From.Name = obj.NAME_PARAM
   744  		p.To.Type = obj.TYPE_REG
   745  		p.To.Reg = v.Reg()
   746  
   747  	case ssa.Op386LoweredGetCallerSP:
   748  		// caller's SP is the address of the first arg
   749  		p := s.Prog(x86.AMOVL)
   750  		p.From.Type = obj.TYPE_ADDR
   751  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   752  		p.From.Name = obj.NAME_PARAM
   753  		p.To.Type = obj.TYPE_REG
   754  		p.To.Reg = v.Reg()
   755  
   756  	case ssa.Op386LoweredWB:
   757  		p := s.Prog(obj.ACALL)
   758  		p.To.Type = obj.TYPE_MEM
   759  		p.To.Name = obj.NAME_EXTERN
   760  		p.To.Sym = v.Aux.(*obj.LSym)
   761  
   762  	case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC:
   763  		p := s.Prog(obj.ACALL)
   764  		p.To.Type = obj.TYPE_MEM
   765  		p.To.Name = obj.NAME_EXTERN
   766  		p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
   767  		s.UseArgs(8) // space used in callee args area by assembly stubs
   768  
   769  	case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC:
   770  		p := s.Prog(obj.ACALL)
   771  		p.To.Type = obj.TYPE_MEM
   772  		p.To.Name = obj.NAME_EXTERN
   773  		p.To.Sym = gc.ExtendCheckFunc[v.AuxInt]
   774  		s.UseArgs(12) // space used in callee args area by assembly stubs
   775  
   776  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   777  		s.Call(v)
   778  	case ssa.Op386NEGL,
   779  		ssa.Op386BSWAPL,
   780  		ssa.Op386NOTL:
   781  		r := v.Reg()
   782  		if r != v.Args[0].Reg() {
   783  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   784  		}
   785  		p := s.Prog(v.Op.Asm())
   786  		p.To.Type = obj.TYPE_REG
   787  		p.To.Reg = r
   788  	case ssa.Op386BSFL, ssa.Op386BSFW,
   789  		ssa.Op386BSRL, ssa.Op386BSRW,
   790  		ssa.Op386SQRTSD:
   791  		p := s.Prog(v.Op.Asm())
   792  		p.From.Type = obj.TYPE_REG
   793  		p.From.Reg = v.Args[0].Reg()
   794  		p.To.Type = obj.TYPE_REG
   795  		p.To.Reg = v.Reg()
   796  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   797  		ssa.Op386SETL, ssa.Op386SETLE,
   798  		ssa.Op386SETG, ssa.Op386SETGE,
   799  		ssa.Op386SETGF, ssa.Op386SETGEF,
   800  		ssa.Op386SETB, ssa.Op386SETBE,
   801  		ssa.Op386SETORD, ssa.Op386SETNAN,
   802  		ssa.Op386SETA, ssa.Op386SETAE,
   803  		ssa.Op386SETO:
   804  		p := s.Prog(v.Op.Asm())
   805  		p.To.Type = obj.TYPE_REG
   806  		p.To.Reg = v.Reg()
   807  
   808  	case ssa.Op386SETNEF:
   809  		p := s.Prog(v.Op.Asm())
   810  		p.To.Type = obj.TYPE_REG
   811  		p.To.Reg = v.Reg()
   812  		q := s.Prog(x86.ASETPS)
   813  		q.To.Type = obj.TYPE_REG
   814  		q.To.Reg = x86.REG_AX
   815  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   816  
   817  	case ssa.Op386SETEQF:
   818  		p := s.Prog(v.Op.Asm())
   819  		p.To.Type = obj.TYPE_REG
   820  		p.To.Reg = v.Reg()
   821  		q := s.Prog(x86.ASETPC)
   822  		q.To.Type = obj.TYPE_REG
   823  		q.To.Reg = x86.REG_AX
   824  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   825  
   826  	case ssa.Op386InvertFlags:
   827  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   828  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   829  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   830  	case ssa.Op386REPSTOSL:
   831  		s.Prog(x86.AREP)
   832  		s.Prog(x86.ASTOSL)
   833  	case ssa.Op386REPMOVSL:
   834  		s.Prog(x86.AREP)
   835  		s.Prog(x86.AMOVSL)
   836  	case ssa.Op386LoweredNilCheck:
   837  		// Issue a load which will fault if the input is nil.
   838  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   839  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   840  		// but it doesn't have false dependency on AX.
   841  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   842  		// That trades clobbering flags for clobbering a register.
   843  		p := s.Prog(x86.ATESTB)
   844  		p.From.Type = obj.TYPE_REG
   845  		p.From.Reg = x86.REG_AX
   846  		p.To.Type = obj.TYPE_MEM
   847  		p.To.Reg = v.Args[0].Reg()
   848  		gc.AddAux(&p.To, v)
   849  		if logopt.Enabled() {
   850  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
   851  		}
   852  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   853  			gc.Warnl(v.Pos, "generated nil check")
   854  		}
   855  	case ssa.Op386FCHS:
   856  		v.Fatalf("FCHS in non-387 mode")
   857  	case ssa.OpClobber:
   858  		p := s.Prog(x86.AMOVL)
   859  		p.From.Type = obj.TYPE_CONST
   860  		p.From.Offset = 0xdeaddead
   861  		p.To.Type = obj.TYPE_MEM
   862  		p.To.Reg = x86.REG_SP
   863  		gc.AddAux(&p.To, v)
   864  	default:
   865  		v.Fatalf("genValue not implemented: %s", v.LongString())
   866  	}
   867  }
   868  
   869  var blockJump = [...]struct {
   870  	asm, invasm obj.As
   871  }{
   872  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   873  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   874  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   875  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   876  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   877  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   878  	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
   879  	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
   880  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   881  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   882  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   883  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   884  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   885  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   886  }
   887  
   888  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   889  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   890  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   891  }
   892  var nefJumps = [2][2]gc.FloatingEQNEJump{
   893  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   894  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   895  }
   896  
   897  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   898  	switch b.Kind {
   899  	case ssa.BlockPlain:
   900  		if b.Succs[0].Block() != next {
   901  			p := s.Prog(obj.AJMP)
   902  			p.To.Type = obj.TYPE_BRANCH
   903  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   904  		}
   905  	case ssa.BlockDefer:
   906  		// defer returns in rax:
   907  		// 0 if we should continue executing
   908  		// 1 if we should jump to deferreturn call
   909  		p := s.Prog(x86.ATESTL)
   910  		p.From.Type = obj.TYPE_REG
   911  		p.From.Reg = x86.REG_AX
   912  		p.To.Type = obj.TYPE_REG
   913  		p.To.Reg = x86.REG_AX
   914  		p = s.Prog(x86.AJNE)
   915  		p.To.Type = obj.TYPE_BRANCH
   916  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   917  		if b.Succs[0].Block() != next {
   918  			p := s.Prog(obj.AJMP)
   919  			p.To.Type = obj.TYPE_BRANCH
   920  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   921  		}
   922  	case ssa.BlockExit:
   923  	case ssa.BlockRet:
   924  		s.Prog(obj.ARET)
   925  	case ssa.BlockRetJmp:
   926  		p := s.Prog(obj.AJMP)
   927  		p.To.Type = obj.TYPE_MEM
   928  		p.To.Name = obj.NAME_EXTERN
   929  		p.To.Sym = b.Aux.(*obj.LSym)
   930  
   931  	case ssa.Block386EQF:
   932  		s.FPJump(b, next, &eqfJumps)
   933  
   934  	case ssa.Block386NEF:
   935  		s.FPJump(b, next, &nefJumps)
   936  
   937  	case ssa.Block386EQ, ssa.Block386NE,
   938  		ssa.Block386LT, ssa.Block386GE,
   939  		ssa.Block386LE, ssa.Block386GT,
   940  		ssa.Block386OS, ssa.Block386OC,
   941  		ssa.Block386ULT, ssa.Block386UGT,
   942  		ssa.Block386ULE, ssa.Block386UGE:
   943  		jmp := blockJump[b.Kind]
   944  		switch next {
   945  		case b.Succs[0].Block():
   946  			s.Br(jmp.invasm, b.Succs[1].Block())
   947  		case b.Succs[1].Block():
   948  			s.Br(jmp.asm, b.Succs[0].Block())
   949  		default:
   950  			if b.Likely != ssa.BranchUnlikely {
   951  				s.Br(jmp.asm, b.Succs[0].Block())
   952  				s.Br(obj.AJMP, b.Succs[1].Block())
   953  			} else {
   954  				s.Br(jmp.invasm, b.Succs[1].Block())
   955  				s.Br(obj.AJMP, b.Succs[0].Block())
   956  			}
   957  		}
   958  	default:
   959  		b.Fatalf("branch not implemented: %s", b.LongString())
   960  	}
   961  }