github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/x86/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && v.Op == ssa.Op386MOVLconst {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // moveByType returns the reg->reg move instruction of the given type.
    79  func moveByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return x86.AMOVSS
    84  		case 8:
    85  			return x86.AMOVSD
    86  		default:
    87  			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88  		}
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		default:
    99  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100  		}
   101  	}
   102  }
   103  
   104  // opregreg emits instructions for
   105  //     dest := dest(To) op src(From)
   106  // and also returns the created obj.Prog so it
   107  // may be further adjusted (offset, scale, etc).
   108  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109  	p := s.Prog(op)
   110  	p.From.Type = obj.TYPE_REG
   111  	p.To.Type = obj.TYPE_REG
   112  	p.To.Reg = dest
   113  	p.From.Reg = src
   114  	return p
   115  }
   116  
   117  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118  	switch v.Op {
   119  	case ssa.Op386ADDL:
   120  		r := v.Reg()
   121  		r1 := v.Args[0].Reg()
   122  		r2 := v.Args[1].Reg()
   123  		switch {
   124  		case r == r1:
   125  			p := s.Prog(v.Op.Asm())
   126  			p.From.Type = obj.TYPE_REG
   127  			p.From.Reg = r2
   128  			p.To.Type = obj.TYPE_REG
   129  			p.To.Reg = r
   130  		case r == r2:
   131  			p := s.Prog(v.Op.Asm())
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = r1
   134  			p.To.Type = obj.TYPE_REG
   135  			p.To.Reg = r
   136  		default:
   137  			p := s.Prog(x86.ALEAL)
   138  			p.From.Type = obj.TYPE_MEM
   139  			p.From.Reg = r1
   140  			p.From.Scale = 1
   141  			p.From.Index = r2
   142  			p.To.Type = obj.TYPE_REG
   143  			p.To.Reg = r
   144  		}
   145  
   146  	// 2-address opcode arithmetic
   147  	case ssa.Op386SUBL,
   148  		ssa.Op386MULL,
   149  		ssa.Op386ANDL,
   150  		ssa.Op386ORL,
   151  		ssa.Op386XORL,
   152  		ssa.Op386SHLL,
   153  		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154  		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155  		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157  		ssa.Op386PXOR,
   158  		ssa.Op386ADCL,
   159  		ssa.Op386SBBL:
   160  		r := v.Reg()
   161  		if r != v.Args[0].Reg() {
   162  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163  		}
   164  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165  
   166  	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   168  		r := v.Reg0()
   169  		if r != v.Args[0].Reg() {
   170  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171  		}
   172  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173  
   174  	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175  		// output 0 is carry/borrow, output 1 is the low 32 bits.
   176  		r := v.Reg0()
   177  		if r != v.Args[0].Reg() {
   178  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179  		}
   180  		p := s.Prog(v.Op.Asm())
   181  		p.From.Type = obj.TYPE_CONST
   182  		p.From.Offset = v.AuxInt
   183  		p.To.Type = obj.TYPE_REG
   184  		p.To.Reg = r
   185  
   186  	case ssa.Op386DIVL, ssa.Op386DIVW,
   187  		ssa.Op386DIVLU, ssa.Op386DIVWU,
   188  		ssa.Op386MODL, ssa.Op386MODW,
   189  		ssa.Op386MODLU, ssa.Op386MODWU:
   190  
   191  		// Arg[0] is already in AX as it's the only register we allow
   192  		// and AX is the only output
   193  		x := v.Args[1].Reg()
   194  
   195  		// CPU faults upon signed overflow, which occurs when most
   196  		// negative int is divided by -1.
   197  		var j *obj.Prog
   198  		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199  			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200  
   201  			if ssa.NeedsFixUp(v) {
   202  				var c *obj.Prog
   203  				switch v.Op {
   204  				case ssa.Op386DIVL, ssa.Op386MODL:
   205  					c = s.Prog(x86.ACMPL)
   206  					j = s.Prog(x86.AJEQ)
   207  
   208  				case ssa.Op386DIVW, ssa.Op386MODW:
   209  					c = s.Prog(x86.ACMPW)
   210  					j = s.Prog(x86.AJEQ)
   211  				}
   212  				c.From.Type = obj.TYPE_REG
   213  				c.From.Reg = x
   214  				c.To.Type = obj.TYPE_CONST
   215  				c.To.Offset = -1
   216  
   217  				j.To.Type = obj.TYPE_BRANCH
   218  			}
   219  			// sign extend the dividend
   220  			switch v.Op {
   221  			case ssa.Op386DIVL, ssa.Op386MODL:
   222  				s.Prog(x86.ACDQ)
   223  			case ssa.Op386DIVW, ssa.Op386MODW:
   224  				s.Prog(x86.ACWD)
   225  			}
   226  		}
   227  
   228  		// for unsigned ints, we sign extend by setting DX = 0
   229  		// signed ints were sign extended above
   230  		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   231  			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   232  			c := s.Prog(x86.AXORL)
   233  			c.From.Type = obj.TYPE_REG
   234  			c.From.Reg = x86.REG_DX
   235  			c.To.Type = obj.TYPE_REG
   236  			c.To.Reg = x86.REG_DX
   237  		}
   238  
   239  		p := s.Prog(v.Op.Asm())
   240  		p.From.Type = obj.TYPE_REG
   241  		p.From.Reg = x
   242  
   243  		// signed division, rest of the check for -1 case
   244  		if j != nil {
   245  			j2 := s.Prog(obj.AJMP)
   246  			j2.To.Type = obj.TYPE_BRANCH
   247  
   248  			var n *obj.Prog
   249  			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   250  				// n * -1 = -n
   251  				n = s.Prog(x86.ANEGL)
   252  				n.To.Type = obj.TYPE_REG
   253  				n.To.Reg = x86.REG_AX
   254  			} else {
   255  				// n % -1 == 0
   256  				n = s.Prog(x86.AXORL)
   257  				n.From.Type = obj.TYPE_REG
   258  				n.From.Reg = x86.REG_DX
   259  				n.To.Type = obj.TYPE_REG
   260  				n.To.Reg = x86.REG_DX
   261  			}
   262  
   263  			j.To.Val = n
   264  			j2.To.Val = s.Pc()
   265  		}
   266  
   267  	case ssa.Op386HMULL, ssa.Op386HMULLU:
   268  		// the frontend rewrites constant division by 8/16/32 bit integers into
   269  		// HMUL by a constant
   270  		// SSA rewrites generate the 64 bit versions
   271  
   272  		// Arg[0] is already in AX as it's the only register we allow
   273  		// and DX is the only output we care about (the high bits)
   274  		p := s.Prog(v.Op.Asm())
   275  		p.From.Type = obj.TYPE_REG
   276  		p.From.Reg = v.Args[1].Reg()
   277  
   278  		// IMULB puts the high portion in AH instead of DL,
   279  		// so move it to DL for consistency
   280  		if v.Type.Size() == 1 {
   281  			m := s.Prog(x86.AMOVB)
   282  			m.From.Type = obj.TYPE_REG
   283  			m.From.Reg = x86.REG_AH
   284  			m.To.Type = obj.TYPE_REG
   285  			m.To.Reg = x86.REG_DX
   286  		}
   287  
   288  	case ssa.Op386MULLU:
   289  		// Arg[0] is already in AX as it's the only register we allow
   290  		// results lo in AX
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = v.Args[1].Reg()
   294  
   295  	case ssa.Op386MULLQU:
   296  		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   297  		p := s.Prog(v.Op.Asm())
   298  		p.From.Type = obj.TYPE_REG
   299  		p.From.Reg = v.Args[1].Reg()
   300  
   301  	case ssa.Op386AVGLU:
   302  		// compute (x+y)/2 unsigned.
   303  		// Do a 32-bit add, the overflow goes into the carry.
   304  		// Shift right once and pull the carry back into the 31st bit.
   305  		r := v.Reg()
   306  		if r != v.Args[0].Reg() {
   307  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   308  		}
   309  		p := s.Prog(x86.AADDL)
   310  		p.From.Type = obj.TYPE_REG
   311  		p.To.Type = obj.TYPE_REG
   312  		p.To.Reg = r
   313  		p.From.Reg = v.Args[1].Reg()
   314  		p = s.Prog(x86.ARCRL)
   315  		p.From.Type = obj.TYPE_CONST
   316  		p.From.Offset = 1
   317  		p.To.Type = obj.TYPE_REG
   318  		p.To.Reg = r
   319  
   320  	case ssa.Op386ADDLconst:
   321  		r := v.Reg()
   322  		a := v.Args[0].Reg()
   323  		if r == a {
   324  			if v.AuxInt == 1 {
   325  				p := s.Prog(x86.AINCL)
   326  				p.To.Type = obj.TYPE_REG
   327  				p.To.Reg = r
   328  				return
   329  			}
   330  			if v.AuxInt == -1 {
   331  				p := s.Prog(x86.ADECL)
   332  				p.To.Type = obj.TYPE_REG
   333  				p.To.Reg = r
   334  				return
   335  			}
   336  			p := s.Prog(v.Op.Asm())
   337  			p.From.Type = obj.TYPE_CONST
   338  			p.From.Offset = v.AuxInt
   339  			p.To.Type = obj.TYPE_REG
   340  			p.To.Reg = r
   341  			return
   342  		}
   343  		p := s.Prog(x86.ALEAL)
   344  		p.From.Type = obj.TYPE_MEM
   345  		p.From.Reg = a
   346  		p.From.Offset = v.AuxInt
   347  		p.To.Type = obj.TYPE_REG
   348  		p.To.Reg = r
   349  
   350  	case ssa.Op386MULLconst:
   351  		r := v.Reg()
   352  		p := s.Prog(v.Op.Asm())
   353  		p.From.Type = obj.TYPE_CONST
   354  		p.From.Offset = v.AuxInt
   355  		p.To.Type = obj.TYPE_REG
   356  		p.To.Reg = r
   357  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   358  
   359  	case ssa.Op386SUBLconst,
   360  		ssa.Op386ADCLconst,
   361  		ssa.Op386SBBLconst,
   362  		ssa.Op386ANDLconst,
   363  		ssa.Op386ORLconst,
   364  		ssa.Op386XORLconst,
   365  		ssa.Op386SHLLconst,
   366  		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   367  		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   368  		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   369  		r := v.Reg()
   370  		if r != v.Args[0].Reg() {
   371  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   372  		}
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_CONST
   375  		p.From.Offset = v.AuxInt
   376  		p.To.Type = obj.TYPE_REG
   377  		p.To.Reg = r
   378  	case ssa.Op386SBBLcarrymask:
   379  		r := v.Reg()
   380  		p := s.Prog(v.Op.Asm())
   381  		p.From.Type = obj.TYPE_REG
   382  		p.From.Reg = r
   383  		p.To.Type = obj.TYPE_REG
   384  		p.To.Reg = r
   385  	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   386  		r := v.Args[0].Reg()
   387  		i := v.Args[1].Reg()
   388  		p := s.Prog(x86.ALEAL)
   389  		switch v.Op {
   390  		case ssa.Op386LEAL1:
   391  			p.From.Scale = 1
   392  			if i == x86.REG_SP {
   393  				r, i = i, r
   394  			}
   395  		case ssa.Op386LEAL2:
   396  			p.From.Scale = 2
   397  		case ssa.Op386LEAL4:
   398  			p.From.Scale = 4
   399  		case ssa.Op386LEAL8:
   400  			p.From.Scale = 8
   401  		}
   402  		p.From.Type = obj.TYPE_MEM
   403  		p.From.Reg = r
   404  		p.From.Index = i
   405  		gc.AddAux(&p.From, v)
   406  		p.To.Type = obj.TYPE_REG
   407  		p.To.Reg = v.Reg()
   408  	case ssa.Op386LEAL:
   409  		p := s.Prog(x86.ALEAL)
   410  		p.From.Type = obj.TYPE_MEM
   411  		p.From.Reg = v.Args[0].Reg()
   412  		gc.AddAux(&p.From, v)
   413  		p.To.Type = obj.TYPE_REG
   414  		p.To.Reg = v.Reg()
   415  	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   416  		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   417  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   418  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   419  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   420  		// must account for that right here.
   421  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   422  	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   423  		p := s.Prog(v.Op.Asm())
   424  		p.From.Type = obj.TYPE_REG
   425  		p.From.Reg = v.Args[0].Reg()
   426  		p.To.Type = obj.TYPE_CONST
   427  		p.To.Offset = v.AuxInt
   428  	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   429  		p := s.Prog(v.Op.Asm())
   430  		p.From.Type = obj.TYPE_CONST
   431  		p.From.Offset = v.AuxInt
   432  		p.To.Type = obj.TYPE_REG
   433  		p.To.Reg = v.Args[0].Reg()
   434  	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   435  		p := s.Prog(v.Op.Asm())
   436  		p.From.Type = obj.TYPE_MEM
   437  		p.From.Reg = v.Args[0].Reg()
   438  		gc.AddAux(&p.From, v)
   439  		p.To.Type = obj.TYPE_REG
   440  		p.To.Reg = v.Args[1].Reg()
   441  	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   442  		sc := v.AuxValAndOff()
   443  		p := s.Prog(v.Op.Asm())
   444  		p.From.Type = obj.TYPE_MEM
   445  		p.From.Reg = v.Args[0].Reg()
   446  		gc.AddAux2(&p.From, v, sc.Off())
   447  		p.To.Type = obj.TYPE_CONST
   448  		p.To.Offset = sc.Val()
   449  	case ssa.Op386MOVLconst:
   450  		x := v.Reg()
   451  
   452  		// If flags aren't live (indicated by v.Aux == nil),
   453  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   454  		if v.AuxInt == 0 && v.Aux == nil {
   455  			p := s.Prog(x86.AXORL)
   456  			p.From.Type = obj.TYPE_REG
   457  			p.From.Reg = x
   458  			p.To.Type = obj.TYPE_REG
   459  			p.To.Reg = x
   460  			break
   461  		}
   462  
   463  		p := s.Prog(v.Op.Asm())
   464  		p.From.Type = obj.TYPE_CONST
   465  		p.From.Offset = v.AuxInt
   466  		p.To.Type = obj.TYPE_REG
   467  		p.To.Reg = x
   468  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   469  		x := v.Reg()
   470  		p := s.Prog(v.Op.Asm())
   471  		p.From.Type = obj.TYPE_FCONST
   472  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   473  		p.To.Type = obj.TYPE_REG
   474  		p.To.Reg = x
   475  	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   476  		p := s.Prog(x86.ALEAL)
   477  		p.From.Type = obj.TYPE_MEM
   478  		p.From.Name = obj.NAME_EXTERN
   479  		f := math.Float64frombits(uint64(v.AuxInt))
   480  		if v.Op == ssa.Op386MOVSDconst1 {
   481  			p.From.Sym = gc.Ctxt.Float64Sym(f)
   482  		} else {
   483  			p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   484  		}
   485  		p.To.Type = obj.TYPE_REG
   486  		p.To.Reg = v.Reg()
   487  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   488  		p := s.Prog(v.Op.Asm())
   489  		p.From.Type = obj.TYPE_MEM
   490  		p.From.Reg = v.Args[0].Reg()
   491  		p.To.Type = obj.TYPE_REG
   492  		p.To.Reg = v.Reg()
   493  
   494  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   495  		p := s.Prog(v.Op.Asm())
   496  		p.From.Type = obj.TYPE_MEM
   497  		p.From.Reg = v.Args[0].Reg()
   498  		gc.AddAux(&p.From, v)
   499  		p.To.Type = obj.TYPE_REG
   500  		p.To.Reg = v.Reg()
   501  	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   502  		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   503  		r := v.Args[0].Reg()
   504  		i := v.Args[1].Reg()
   505  		p := s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_MEM
   507  		switch v.Op {
   508  		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   509  			if i == x86.REG_SP {
   510  				r, i = i, r
   511  			}
   512  			p.From.Scale = 1
   513  		case ssa.Op386MOVSDloadidx8:
   514  			p.From.Scale = 8
   515  		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   516  			p.From.Scale = 4
   517  		case ssa.Op386MOVWloadidx2:
   518  			p.From.Scale = 2
   519  		}
   520  		p.From.Reg = r
   521  		p.From.Index = i
   522  		gc.AddAux(&p.From, v)
   523  		p.To.Type = obj.TYPE_REG
   524  		p.To.Reg = v.Reg()
   525  	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   526  		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   527  		p := s.Prog(v.Op.Asm())
   528  		p.From.Type = obj.TYPE_MEM
   529  		p.From.Reg = v.Args[1].Reg()
   530  		p.From.Index = v.Args[2].Reg()
   531  		p.From.Scale = 4
   532  		gc.AddAux(&p.From, v)
   533  		p.To.Type = obj.TYPE_REG
   534  		p.To.Reg = v.Reg()
   535  		if v.Reg() != v.Args[0].Reg() {
   536  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   537  		}
   538  	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   539  		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   540  		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   541  		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   542  		p := s.Prog(v.Op.Asm())
   543  		p.From.Type = obj.TYPE_MEM
   544  		p.From.Reg = v.Args[1].Reg()
   545  		gc.AddAux(&p.From, v)
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg()
   548  		if v.Reg() != v.Args[0].Reg() {
   549  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   550  		}
   551  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   552  		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   553  		p := s.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_REG
   555  		p.From.Reg = v.Args[1].Reg()
   556  		p.To.Type = obj.TYPE_MEM
   557  		p.To.Reg = v.Args[0].Reg()
   558  		gc.AddAux(&p.To, v)
   559  	case ssa.Op386ADDLconstmodify:
   560  		sc := v.AuxValAndOff()
   561  		val := sc.Val()
   562  		if val == 1 || val == -1 {
   563  			var p *obj.Prog
   564  			if val == 1 {
   565  				p = s.Prog(x86.AINCL)
   566  			} else {
   567  				p = s.Prog(x86.ADECL)
   568  			}
   569  			off := sc.Off()
   570  			p.To.Type = obj.TYPE_MEM
   571  			p.To.Reg = v.Args[0].Reg()
   572  			gc.AddAux2(&p.To, v, off)
   573  			break
   574  		}
   575  		fallthrough
   576  	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   577  		sc := v.AuxValAndOff()
   578  		off := sc.Off()
   579  		val := sc.Val()
   580  		p := s.Prog(v.Op.Asm())
   581  		p.From.Type = obj.TYPE_CONST
   582  		p.From.Offset = val
   583  		p.To.Type = obj.TYPE_MEM
   584  		p.To.Reg = v.Args[0].Reg()
   585  		gc.AddAux2(&p.To, v, off)
   586  	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   587  		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   588  		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   589  		r := v.Args[0].Reg()
   590  		i := v.Args[1].Reg()
   591  		p := s.Prog(v.Op.Asm())
   592  		p.From.Type = obj.TYPE_REG
   593  		p.From.Reg = v.Args[2].Reg()
   594  		p.To.Type = obj.TYPE_MEM
   595  		switch v.Op {
   596  		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   597  			if i == x86.REG_SP {
   598  				r, i = i, r
   599  			}
   600  			p.To.Scale = 1
   601  		case ssa.Op386MOVSDstoreidx8:
   602  			p.To.Scale = 8
   603  		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   604  			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   605  			p.To.Scale = 4
   606  		case ssa.Op386MOVWstoreidx2:
   607  			p.To.Scale = 2
   608  		}
   609  		p.To.Reg = r
   610  		p.To.Index = i
   611  		gc.AddAux(&p.To, v)
   612  	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   613  		p := s.Prog(v.Op.Asm())
   614  		p.From.Type = obj.TYPE_CONST
   615  		sc := v.AuxValAndOff()
   616  		p.From.Offset = sc.Val()
   617  		p.To.Type = obj.TYPE_MEM
   618  		p.To.Reg = v.Args[0].Reg()
   619  		gc.AddAux2(&p.To, v, sc.Off())
   620  	case ssa.Op386ADDLconstmodifyidx4:
   621  		sc := v.AuxValAndOff()
   622  		val := sc.Val()
   623  		if val == 1 || val == -1 {
   624  			var p *obj.Prog
   625  			if val == 1 {
   626  				p = s.Prog(x86.AINCL)
   627  			} else {
   628  				p = s.Prog(x86.ADECL)
   629  			}
   630  			off := sc.Off()
   631  			p.To.Type = obj.TYPE_MEM
   632  			p.To.Reg = v.Args[0].Reg()
   633  			p.To.Scale = 4
   634  			p.To.Index = v.Args[1].Reg()
   635  			gc.AddAux2(&p.To, v, off)
   636  			break
   637  		}
   638  		fallthrough
   639  	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   640  		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   641  		p := s.Prog(v.Op.Asm())
   642  		p.From.Type = obj.TYPE_CONST
   643  		sc := v.AuxValAndOff()
   644  		p.From.Offset = sc.Val()
   645  		r := v.Args[0].Reg()
   646  		i := v.Args[1].Reg()
   647  		switch v.Op {
   648  		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   649  			p.To.Scale = 1
   650  			if i == x86.REG_SP {
   651  				r, i = i, r
   652  			}
   653  		case ssa.Op386MOVWstoreconstidx2:
   654  			p.To.Scale = 2
   655  		case ssa.Op386MOVLstoreconstidx4,
   656  			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   657  			p.To.Scale = 4
   658  		}
   659  		p.To.Type = obj.TYPE_MEM
   660  		p.To.Reg = r
   661  		p.To.Index = i
   662  		gc.AddAux2(&p.To, v, sc.Off())
   663  	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   664  		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   665  		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   666  		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   667  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   668  	case ssa.Op386DUFFZERO:
   669  		p := s.Prog(obj.ADUFFZERO)
   670  		p.To.Type = obj.TYPE_ADDR
   671  		p.To.Sym = gc.Duffzero
   672  		p.To.Offset = v.AuxInt
   673  	case ssa.Op386DUFFCOPY:
   674  		p := s.Prog(obj.ADUFFCOPY)
   675  		p.To.Type = obj.TYPE_ADDR
   676  		p.To.Sym = gc.Duffcopy
   677  		p.To.Offset = v.AuxInt
   678  
   679  	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   680  		if v.Type.IsMemory() {
   681  			return
   682  		}
   683  		x := v.Args[0].Reg()
   684  		y := v.Reg()
   685  		if x != y {
   686  			opregreg(s, moveByType(v.Type), y, x)
   687  		}
   688  	case ssa.OpLoadReg:
   689  		if v.Type.IsFlags() {
   690  			v.Fatalf("load flags not implemented: %v", v.LongString())
   691  			return
   692  		}
   693  		p := s.Prog(loadByType(v.Type))
   694  		gc.AddrAuto(&p.From, v.Args[0])
   695  		p.To.Type = obj.TYPE_REG
   696  		p.To.Reg = v.Reg()
   697  
   698  	case ssa.OpStoreReg:
   699  		if v.Type.IsFlags() {
   700  			v.Fatalf("store flags not implemented: %v", v.LongString())
   701  			return
   702  		}
   703  		p := s.Prog(storeByType(v.Type))
   704  		p.From.Type = obj.TYPE_REG
   705  		p.From.Reg = v.Args[0].Reg()
   706  		gc.AddrAuto(&p.To, v)
   707  	case ssa.Op386LoweredGetClosurePtr:
   708  		// Closure pointer is DX.
   709  		gc.CheckLoweredGetClosurePtr(v)
   710  	case ssa.Op386LoweredGetG:
   711  		r := v.Reg()
   712  		// See the comments in cmd/internal/obj/x86/obj6.go
   713  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   714  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   715  			// MOVL (TLS), r
   716  			p := s.Prog(x86.AMOVL)
   717  			p.From.Type = obj.TYPE_MEM
   718  			p.From.Reg = x86.REG_TLS
   719  			p.To.Type = obj.TYPE_REG
   720  			p.To.Reg = r
   721  		} else {
   722  			// MOVL TLS, r
   723  			// MOVL (r)(TLS*1), r
   724  			p := s.Prog(x86.AMOVL)
   725  			p.From.Type = obj.TYPE_REG
   726  			p.From.Reg = x86.REG_TLS
   727  			p.To.Type = obj.TYPE_REG
   728  			p.To.Reg = r
   729  			q := s.Prog(x86.AMOVL)
   730  			q.From.Type = obj.TYPE_MEM
   731  			q.From.Reg = r
   732  			q.From.Index = x86.REG_TLS
   733  			q.From.Scale = 1
   734  			q.To.Type = obj.TYPE_REG
   735  			q.To.Reg = r
   736  		}
   737  
   738  	case ssa.Op386LoweredGetCallerPC:
   739  		p := s.Prog(x86.AMOVL)
   740  		p.From.Type = obj.TYPE_MEM
   741  		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   742  		p.From.Name = obj.NAME_PARAM
   743  		p.To.Type = obj.TYPE_REG
   744  		p.To.Reg = v.Reg()
   745  
   746  	case ssa.Op386LoweredGetCallerSP:
   747  		// caller's SP is the address of the first arg
   748  		p := s.Prog(x86.AMOVL)
   749  		p.From.Type = obj.TYPE_ADDR
   750  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   751  		p.From.Name = obj.NAME_PARAM
   752  		p.To.Type = obj.TYPE_REG
   753  		p.To.Reg = v.Reg()
   754  
   755  	case ssa.Op386LoweredWB:
   756  		p := s.Prog(obj.ACALL)
   757  		p.To.Type = obj.TYPE_MEM
   758  		p.To.Name = obj.NAME_EXTERN
   759  		p.To.Sym = v.Aux.(*obj.LSym)
   760  
   761  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   762  		s.Call(v)
   763  	case ssa.Op386NEGL,
   764  		ssa.Op386BSWAPL,
   765  		ssa.Op386NOTL:
   766  		r := v.Reg()
   767  		if r != v.Args[0].Reg() {
   768  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   769  		}
   770  		p := s.Prog(v.Op.Asm())
   771  		p.To.Type = obj.TYPE_REG
   772  		p.To.Reg = r
   773  	case ssa.Op386BSFL, ssa.Op386BSFW,
   774  		ssa.Op386BSRL, ssa.Op386BSRW,
   775  		ssa.Op386SQRTSD:
   776  		p := s.Prog(v.Op.Asm())
   777  		p.From.Type = obj.TYPE_REG
   778  		p.From.Reg = v.Args[0].Reg()
   779  		p.To.Type = obj.TYPE_REG
   780  		p.To.Reg = v.Reg()
   781  	case ssa.Op386SETEQ, ssa.Op386SETNE,
   782  		ssa.Op386SETL, ssa.Op386SETLE,
   783  		ssa.Op386SETG, ssa.Op386SETGE,
   784  		ssa.Op386SETGF, ssa.Op386SETGEF,
   785  		ssa.Op386SETB, ssa.Op386SETBE,
   786  		ssa.Op386SETORD, ssa.Op386SETNAN,
   787  		ssa.Op386SETA, ssa.Op386SETAE,
   788  		ssa.Op386SETO:
   789  		p := s.Prog(v.Op.Asm())
   790  		p.To.Type = obj.TYPE_REG
   791  		p.To.Reg = v.Reg()
   792  
   793  	case ssa.Op386SETNEF:
   794  		p := s.Prog(v.Op.Asm())
   795  		p.To.Type = obj.TYPE_REG
   796  		p.To.Reg = v.Reg()
   797  		q := s.Prog(x86.ASETPS)
   798  		q.To.Type = obj.TYPE_REG
   799  		q.To.Reg = x86.REG_AX
   800  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   801  
   802  	case ssa.Op386SETEQF:
   803  		p := s.Prog(v.Op.Asm())
   804  		p.To.Type = obj.TYPE_REG
   805  		p.To.Reg = v.Reg()
   806  		q := s.Prog(x86.ASETPC)
   807  		q.To.Type = obj.TYPE_REG
   808  		q.To.Reg = x86.REG_AX
   809  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   810  
   811  	case ssa.Op386InvertFlags:
   812  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   813  	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   814  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   815  	case ssa.Op386REPSTOSL:
   816  		s.Prog(x86.AREP)
   817  		s.Prog(x86.ASTOSL)
   818  	case ssa.Op386REPMOVSL:
   819  		s.Prog(x86.AREP)
   820  		s.Prog(x86.AMOVSL)
   821  	case ssa.Op386LoweredNilCheck:
   822  		// Issue a load which will fault if the input is nil.
   823  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   824  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   825  		// but it doesn't have false dependency on AX.
   826  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   827  		// That trades clobbering flags for clobbering a register.
   828  		p := s.Prog(x86.ATESTB)
   829  		p.From.Type = obj.TYPE_REG
   830  		p.From.Reg = x86.REG_AX
   831  		p.To.Type = obj.TYPE_MEM
   832  		p.To.Reg = v.Args[0].Reg()
   833  		gc.AddAux(&p.To, v)
   834  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   835  			gc.Warnl(v.Pos, "generated nil check")
   836  		}
   837  	case ssa.Op386FCHS:
   838  		v.Fatalf("FCHS in non-387 mode")
   839  	case ssa.OpClobber:
   840  		p := s.Prog(x86.AMOVL)
   841  		p.From.Type = obj.TYPE_CONST
   842  		p.From.Offset = 0xdeaddead
   843  		p.To.Type = obj.TYPE_MEM
   844  		p.To.Reg = x86.REG_SP
   845  		gc.AddAux(&p.To, v)
   846  	default:
   847  		v.Fatalf("genValue not implemented: %s", v.LongString())
   848  	}
   849  }
   850  
   851  var blockJump = [...]struct {
   852  	asm, invasm obj.As
   853  }{
   854  	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   855  	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   856  	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   857  	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   858  	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   859  	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   860  	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
   861  	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
   862  	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   863  	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   864  	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   865  	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   866  	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   867  	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   868  }
   869  
   870  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   871  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   872  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   873  }
   874  var nefJumps = [2][2]gc.FloatingEQNEJump{
   875  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   876  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   877  }
   878  
   879  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   880  	switch b.Kind {
   881  	case ssa.BlockPlain:
   882  		if b.Succs[0].Block() != next {
   883  			p := s.Prog(obj.AJMP)
   884  			p.To.Type = obj.TYPE_BRANCH
   885  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   886  		}
   887  	case ssa.BlockDefer:
   888  		// defer returns in rax:
   889  		// 0 if we should continue executing
   890  		// 1 if we should jump to deferreturn call
   891  		p := s.Prog(x86.ATESTL)
   892  		p.From.Type = obj.TYPE_REG
   893  		p.From.Reg = x86.REG_AX
   894  		p.To.Type = obj.TYPE_REG
   895  		p.To.Reg = x86.REG_AX
   896  		p = s.Prog(x86.AJNE)
   897  		p.To.Type = obj.TYPE_BRANCH
   898  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   899  		if b.Succs[0].Block() != next {
   900  			p := s.Prog(obj.AJMP)
   901  			p.To.Type = obj.TYPE_BRANCH
   902  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   903  		}
   904  	case ssa.BlockExit:
   905  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   906  	case ssa.BlockRet:
   907  		s.Prog(obj.ARET)
   908  	case ssa.BlockRetJmp:
   909  		p := s.Prog(obj.AJMP)
   910  		p.To.Type = obj.TYPE_MEM
   911  		p.To.Name = obj.NAME_EXTERN
   912  		p.To.Sym = b.Aux.(*obj.LSym)
   913  
   914  	case ssa.Block386EQF:
   915  		s.FPJump(b, next, &eqfJumps)
   916  
   917  	case ssa.Block386NEF:
   918  		s.FPJump(b, next, &nefJumps)
   919  
   920  	case ssa.Block386EQ, ssa.Block386NE,
   921  		ssa.Block386LT, ssa.Block386GE,
   922  		ssa.Block386LE, ssa.Block386GT,
   923  		ssa.Block386OS, ssa.Block386OC,
   924  		ssa.Block386ULT, ssa.Block386UGT,
   925  		ssa.Block386ULE, ssa.Block386UGE:
   926  		jmp := blockJump[b.Kind]
   927  		switch next {
   928  		case b.Succs[0].Block():
   929  			s.Br(jmp.invasm, b.Succs[1].Block())
   930  		case b.Succs[1].Block():
   931  			s.Br(jmp.asm, b.Succs[0].Block())
   932  		default:
   933  			if b.Likely != ssa.BranchUnlikely {
   934  				s.Br(jmp.asm, b.Succs[0].Block())
   935  				s.Br(obj.AJMP, b.Succs[1].Block())
   936  			} else {
   937  				s.Br(jmp.invasm, b.Succs[1].Block())
   938  				s.Br(obj.AJMP, b.Succs[0].Block())
   939  			}
   940  		}
   941  	default:
   942  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   943  	}
   944  }