github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"github.com/go-asm/go/buildcfg"
    12  
    13  	"github.com/go-asm/go/cmd/compile/base"
    14  	"github.com/go-asm/go/cmd/compile/ir"
    15  	"github.com/go-asm/go/cmd/compile/logopt"
    16  	"github.com/go-asm/go/cmd/compile/objw"
    17  	"github.com/go-asm/go/cmd/compile/ssa"
    18  	"github.com/go-asm/go/cmd/compile/ssagen"
    19  	"github.com/go-asm/go/cmd/compile/types"
    20  	"github.com/go-asm/go/cmd/obj"
    21  	"github.com/go-asm/go/cmd/obj/x86"
    22  )
    23  
    24  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    25  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    26  	flive := b.FlagsLiveAtEnd
    27  	for _, c := range b.ControlValues() {
    28  		flive = c.Type.IsFlags() || flive
    29  	}
    30  	for i := len(b.Values) - 1; i >= 0; i-- {
    31  		v := b.Values[i]
    32  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    33  			// The "mark" is any non-nil Aux value.
    34  			v.Aux = ssa.AuxMark
    35  		}
    36  		if v.Type.IsFlags() {
    37  			flive = false
    38  		}
    39  		for _, a := range v.Args {
    40  			if a.Type.IsFlags() {
    41  				flive = true
    42  			}
    43  		}
    44  	}
    45  }
    46  
    47  // loadByType returns the load instruction of the given type.
    48  func loadByType(t *types.Type) obj.As {
    49  	// Avoid partial register write
    50  	if !t.IsFloat() {
    51  		switch t.Size() {
    52  		case 1:
    53  			return x86.AMOVBLZX
    54  		case 2:
    55  			return x86.AMOVWLZX
    56  		}
    57  	}
    58  	// Otherwise, there's no difference between load and store opcodes.
    59  	return storeByType(t)
    60  }
    61  
    62  // storeByType returns the store instruction of the given type.
    63  func storeByType(t *types.Type) obj.As {
    64  	width := t.Size()
    65  	if t.IsFloat() {
    66  		switch width {
    67  		case 4:
    68  			return x86.AMOVSS
    69  		case 8:
    70  			return x86.AMOVSD
    71  		}
    72  	} else {
    73  		switch width {
    74  		case 1:
    75  			return x86.AMOVB
    76  		case 2:
    77  			return x86.AMOVW
    78  		case 4:
    79  			return x86.AMOVL
    80  		case 8:
    81  			return x86.AMOVQ
    82  		case 16:
    83  			return x86.AMOVUPS
    84  		}
    85  	}
    86  	panic(fmt.Sprintf("bad store type %v", t))
    87  }
    88  
    89  // moveByType returns the reg->reg move instruction of the given type.
    90  func moveByType(t *types.Type) obj.As {
    91  	if t.IsFloat() {
    92  		// Moving the whole sse2 register is faster
    93  		// than moving just the correct low portion of it.
    94  		// There is no xmm->xmm move with 1 byte opcode,
    95  		// so use movups, which has 2 byte opcode.
    96  		return x86.AMOVUPS
    97  	} else {
    98  		switch t.Size() {
    99  		case 1:
   100  			// Avoids partial register write
   101  			return x86.AMOVL
   102  		case 2:
   103  			return x86.AMOVL
   104  		case 4:
   105  			return x86.AMOVL
   106  		case 8:
   107  			return x86.AMOVQ
   108  		case 16:
   109  			return x86.AMOVUPS // int128s are in SSE registers
   110  		default:
   111  			panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
   112  		}
   113  	}
   114  }
   115  
   116  // opregreg emits instructions for
   117  //
   118  //	dest := dest(To) op src(From)
   119  //
   120  // and also returns the created obj.Prog so it
   121  // may be further adjusted (offset, scale, etc).
   122  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   123  	p := s.Prog(op)
   124  	p.From.Type = obj.TYPE_REG
   125  	p.To.Type = obj.TYPE_REG
   126  	p.To.Reg = dest
   127  	p.From.Reg = src
   128  	return p
   129  }
   130  
   131  // memIdx fills out a as an indexed memory reference for v.
   132  // It assumes that the base register and the index register
   133  // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
   134  // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
   135  func memIdx(a *obj.Addr, v *ssa.Value) {
   136  	r, i := v.Args[0].Reg(), v.Args[1].Reg()
   137  	a.Type = obj.TYPE_MEM
   138  	a.Scale = v.Op.Scale()
   139  	if a.Scale == 1 && i == x86.REG_SP {
   140  		r, i = i, r
   141  	}
   142  	a.Reg = r
   143  	a.Index = i
   144  }
   145  
   146  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   147  // See runtime/mkduff.go.
   148  func duffStart(size int64) int64 {
   149  	x, _ := duff(size)
   150  	return x
   151  }
   152  func duffAdj(size int64) int64 {
   153  	_, x := duff(size)
   154  	return x
   155  }
   156  
   157  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   158  // required to use the duffzero mechanism for a block of the given size.
   159  func duff(size int64) (int64, int64) {
   160  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   161  		panic("bad duffzero size")
   162  	}
   163  	steps := size / dzClearStep
   164  	blocks := steps / dzBlockLen
   165  	steps %= dzBlockLen
   166  	off := dzBlockSize * (dzBlocks - blocks)
   167  	var adj int64
   168  	if steps != 0 {
   169  		off -= dzLeaqSize
   170  		off -= dzMovSize * steps
   171  		adj -= dzClearStep * (dzBlockLen - steps)
   172  	}
   173  	return off, adj
   174  }
   175  
   176  func getgFromTLS(s *ssagen.State, r int16) {
   177  	// See the comments in github.com/go-asm/go/cmd/obj/x86/obj6.go
   178  	// near CanUse1InsnTLS for a detailed explanation of these instructions.
   179  	if x86.CanUse1InsnTLS(base.Ctxt) {
   180  		// MOVQ (TLS), r
   181  		p := s.Prog(x86.AMOVQ)
   182  		p.From.Type = obj.TYPE_MEM
   183  		p.From.Reg = x86.REG_TLS
   184  		p.To.Type = obj.TYPE_REG
   185  		p.To.Reg = r
   186  	} else {
   187  		// MOVQ TLS, r
   188  		// MOVQ (r)(TLS*1), r
   189  		p := s.Prog(x86.AMOVQ)
   190  		p.From.Type = obj.TYPE_REG
   191  		p.From.Reg = x86.REG_TLS
   192  		p.To.Type = obj.TYPE_REG
   193  		p.To.Reg = r
   194  		q := s.Prog(x86.AMOVQ)
   195  		q.From.Type = obj.TYPE_MEM
   196  		q.From.Reg = r
   197  		q.From.Index = x86.REG_TLS
   198  		q.From.Scale = 1
   199  		q.To.Type = obj.TYPE_REG
   200  		q.To.Reg = r
   201  	}
   202  }
   203  
   204  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   205  	switch v.Op {
   206  	case ssa.OpAMD64VFMADD231SD:
   207  		p := s.Prog(v.Op.Asm())
   208  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
   209  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   210  		p.AddRestSourceReg(v.Args[1].Reg())
   211  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   212  		r := v.Reg()
   213  		r1 := v.Args[0].Reg()
   214  		r2 := v.Args[1].Reg()
   215  		switch {
   216  		case r == r1:
   217  			p := s.Prog(v.Op.Asm())
   218  			p.From.Type = obj.TYPE_REG
   219  			p.From.Reg = r2
   220  			p.To.Type = obj.TYPE_REG
   221  			p.To.Reg = r
   222  		case r == r2:
   223  			p := s.Prog(v.Op.Asm())
   224  			p.From.Type = obj.TYPE_REG
   225  			p.From.Reg = r1
   226  			p.To.Type = obj.TYPE_REG
   227  			p.To.Reg = r
   228  		default:
   229  			var asm obj.As
   230  			if v.Op == ssa.OpAMD64ADDQ {
   231  				asm = x86.ALEAQ
   232  			} else {
   233  				asm = x86.ALEAL
   234  			}
   235  			p := s.Prog(asm)
   236  			p.From.Type = obj.TYPE_MEM
   237  			p.From.Reg = r1
   238  			p.From.Scale = 1
   239  			p.From.Index = r2
   240  			p.To.Type = obj.TYPE_REG
   241  			p.To.Reg = r
   242  		}
   243  	// 2-address opcode arithmetic
   244  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   245  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   246  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   247  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   248  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   249  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   250  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   251  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   252  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   253  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   254  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   255  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   256  		ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
   257  		ssa.OpAMD64POR, ssa.OpAMD64PXOR,
   258  		ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
   259  		ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
   260  		ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
   261  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   262  
   263  	case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
   264  		p := s.Prog(v.Op.Asm())
   265  		lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
   266  		p.From.Type = obj.TYPE_REG
   267  		p.From.Reg = bits
   268  		p.To.Type = obj.TYPE_REG
   269  		p.To.Reg = lo
   270  		p.AddRestSourceReg(hi)
   271  
   272  	case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
   273  		ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
   274  		ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
   275  		p := s.Prog(v.Op.Asm())
   276  		p.From.Type = obj.TYPE_REG
   277  		p.From.Reg = v.Args[0].Reg()
   278  		p.To.Type = obj.TYPE_REG
   279  		switch v.Op {
   280  		case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
   281  			p.To.Reg = v.Reg0()
   282  		default:
   283  			p.To.Reg = v.Reg()
   284  		}
   285  
   286  	case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
   287  		p := s.Prog(v.Op.Asm())
   288  		p.From.Type = obj.TYPE_REG
   289  		p.From.Reg = v.Args[0].Reg()
   290  		p.To.Type = obj.TYPE_REG
   291  		p.To.Reg = v.Reg()
   292  		p.AddRestSourceReg(v.Args[1].Reg())
   293  
   294  	case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
   295  		ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
   296  		ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
   297  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   298  		p.AddRestSourceReg(v.Args[0].Reg())
   299  
   300  	case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
   301  		ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
   302  		ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
   303  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   304  		m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   305  		ssagen.AddAux(&m, v)
   306  		p.AddRestSource(m)
   307  
   308  	case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
   309  		ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
   310  		ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
   311  		ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
   312  		ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
   313  		ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
   314  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
   315  		m := obj.Addr{Type: obj.TYPE_MEM}
   316  		memIdx(&m, v)
   317  		ssagen.AddAux(&m, v)
   318  		p.AddRestSource(m)
   319  
   320  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   321  		// Arg[0] (the dividend) is in AX.
   322  		// Arg[1] (the divisor) can be in any other register.
   323  		// Result[0] (the quotient) is in AX.
   324  		// Result[1] (the remainder) is in DX.
   325  		r := v.Args[1].Reg()
   326  
   327  		// Zero extend dividend.
   328  		opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
   329  
   330  		// Issue divide.
   331  		p := s.Prog(v.Op.Asm())
   332  		p.From.Type = obj.TYPE_REG
   333  		p.From.Reg = r
   334  
   335  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   336  		// Arg[0] (the dividend) is in AX.
   337  		// Arg[1] (the divisor) can be in any other register.
   338  		// Result[0] (the quotient) is in AX.
   339  		// Result[1] (the remainder) is in DX.
   340  		r := v.Args[1].Reg()
   341  
   342  		var opCMP, opNEG, opSXD obj.As
   343  		switch v.Op {
   344  		case ssa.OpAMD64DIVQ:
   345  			opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
   346  		case ssa.OpAMD64DIVL:
   347  			opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
   348  		case ssa.OpAMD64DIVW:
   349  			opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
   350  		}
   351  
   352  		// CPU faults upon signed overflow, which occurs when the most
   353  		// negative int is divided by -1. Handle divide by -1 as a special case.
   354  		var j1, j2 *obj.Prog
   355  		if ssa.DivisionNeedsFixUp(v) {
   356  			c := s.Prog(opCMP)
   357  			c.From.Type = obj.TYPE_REG
   358  			c.From.Reg = r
   359  			c.To.Type = obj.TYPE_CONST
   360  			c.To.Offset = -1
   361  
   362  			// Divisor is not -1, proceed with normal division.
   363  			j1 = s.Prog(x86.AJNE)
   364  			j1.To.Type = obj.TYPE_BRANCH
   365  
   366  			// Divisor is -1, manually compute quotient and remainder via fixup code.
   367  			// n / -1 = -n
   368  			n1 := s.Prog(opNEG)
   369  			n1.To.Type = obj.TYPE_REG
   370  			n1.To.Reg = x86.REG_AX
   371  
   372  			// n % -1 == 0
   373  			opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
   374  
   375  			// TODO(khr): issue only the -1 fixup code we need.
   376  			// For instance, if only the quotient is used, no point in zeroing the remainder.
   377  
   378  			// Skip over normal division.
   379  			j2 = s.Prog(obj.AJMP)
   380  			j2.To.Type = obj.TYPE_BRANCH
   381  		}
   382  
   383  		// Sign extend dividend and perform division.
   384  		p := s.Prog(opSXD)
   385  		if j1 != nil {
   386  			j1.To.SetTarget(p)
   387  		}
   388  		p = s.Prog(v.Op.Asm())
   389  		p.From.Type = obj.TYPE_REG
   390  		p.From.Reg = r
   391  
   392  		if j2 != nil {
   393  			j2.To.SetTarget(s.Pc())
   394  		}
   395  
   396  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   397  		// the frontend rewrites constant division by 8/16/32 bit integers into
   398  		// HMUL by a constant
   399  		// SSA rewrites generate the 64 bit versions
   400  
   401  		// Arg[0] is already in AX as it's the only register we allow
   402  		// and DX is the only output we care about (the high bits)
   403  		p := s.Prog(v.Op.Asm())
   404  		p.From.Type = obj.TYPE_REG
   405  		p.From.Reg = v.Args[1].Reg()
   406  
   407  		// IMULB puts the high portion in AH instead of DL,
   408  		// so move it to DL for consistency
   409  		if v.Type.Size() == 1 {
   410  			m := s.Prog(x86.AMOVB)
   411  			m.From.Type = obj.TYPE_REG
   412  			m.From.Reg = x86.REG_AH
   413  			m.To.Type = obj.TYPE_REG
   414  			m.To.Reg = x86.REG_DX
   415  		}
   416  
   417  	case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
   418  		// Arg[0] is already in AX as it's the only register we allow
   419  		// results lo in AX
   420  		p := s.Prog(v.Op.Asm())
   421  		p.From.Type = obj.TYPE_REG
   422  		p.From.Reg = v.Args[1].Reg()
   423  
   424  	case ssa.OpAMD64MULQU2:
   425  		// Arg[0] is already in AX as it's the only register we allow
   426  		// results hi in DX, lo in AX
   427  		p := s.Prog(v.Op.Asm())
   428  		p.From.Type = obj.TYPE_REG
   429  		p.From.Reg = v.Args[1].Reg()
   430  
   431  	case ssa.OpAMD64DIVQU2:
   432  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   433  		// results q in AX, r in DX
   434  		p := s.Prog(v.Op.Asm())
   435  		p.From.Type = obj.TYPE_REG
   436  		p.From.Reg = v.Args[2].Reg()
   437  
   438  	case ssa.OpAMD64AVGQU:
   439  		// compute (x+y)/2 unsigned.
   440  		// Do a 64-bit add, the overflow goes into the carry.
   441  		// Shift right once and pull the carry back into the 63rd bit.
   442  		p := s.Prog(x86.AADDQ)
   443  		p.From.Type = obj.TYPE_REG
   444  		p.To.Type = obj.TYPE_REG
   445  		p.To.Reg = v.Reg()
   446  		p.From.Reg = v.Args[1].Reg()
   447  		p = s.Prog(x86.ARCRQ)
   448  		p.From.Type = obj.TYPE_CONST
   449  		p.From.Offset = 1
   450  		p.To.Type = obj.TYPE_REG
   451  		p.To.Reg = v.Reg()
   452  
   453  	case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
   454  		r := v.Reg0()
   455  		r0 := v.Args[0].Reg()
   456  		r1 := v.Args[1].Reg()
   457  		switch r {
   458  		case r0:
   459  			p := s.Prog(v.Op.Asm())
   460  			p.From.Type = obj.TYPE_REG
   461  			p.From.Reg = r1
   462  			p.To.Type = obj.TYPE_REG
   463  			p.To.Reg = r
   464  		case r1:
   465  			p := s.Prog(v.Op.Asm())
   466  			p.From.Type = obj.TYPE_REG
   467  			p.From.Reg = r0
   468  			p.To.Type = obj.TYPE_REG
   469  			p.To.Reg = r
   470  		default:
   471  			v.Fatalf("output not in same register as an input %s", v.LongString())
   472  		}
   473  
   474  	case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
   475  		p := s.Prog(v.Op.Asm())
   476  		p.From.Type = obj.TYPE_REG
   477  		p.From.Reg = v.Args[1].Reg()
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg0()
   480  
   481  	case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_CONST
   484  		p.From.Offset = v.AuxInt
   485  		p.To.Type = obj.TYPE_REG
   486  		p.To.Reg = v.Reg0()
   487  
   488  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   489  		r := v.Reg()
   490  		a := v.Args[0].Reg()
   491  		if r == a {
   492  			switch v.AuxInt {
   493  			case 1:
   494  				var asm obj.As
   495  				// Software optimization manual recommends add $1,reg.
   496  				// But inc/dec is 1 byte smaller. ICC always uses inc
   497  				// Clang/GCC choose depending on flags, but prefer add.
   498  				// Experiments show that inc/dec is both a little faster
   499  				// and make a binary a little smaller.
   500  				if v.Op == ssa.OpAMD64ADDQconst {
   501  					asm = x86.AINCQ
   502  				} else {
   503  					asm = x86.AINCL
   504  				}
   505  				p := s.Prog(asm)
   506  				p.To.Type = obj.TYPE_REG
   507  				p.To.Reg = r
   508  				return
   509  			case -1:
   510  				var asm obj.As
   511  				if v.Op == ssa.OpAMD64ADDQconst {
   512  					asm = x86.ADECQ
   513  				} else {
   514  					asm = x86.ADECL
   515  				}
   516  				p := s.Prog(asm)
   517  				p.To.Type = obj.TYPE_REG
   518  				p.To.Reg = r
   519  				return
   520  			case 0x80:
   521  				// 'SUBQ $-0x80, r' is shorter to encode than
   522  				// and functionally equivalent to 'ADDQ $0x80, r'.
   523  				asm := x86.ASUBL
   524  				if v.Op == ssa.OpAMD64ADDQconst {
   525  					asm = x86.ASUBQ
   526  				}
   527  				p := s.Prog(asm)
   528  				p.From.Type = obj.TYPE_CONST
   529  				p.From.Offset = -0x80
   530  				p.To.Type = obj.TYPE_REG
   531  				p.To.Reg = r
   532  				return
   533  
   534  			}
   535  			p := s.Prog(v.Op.Asm())
   536  			p.From.Type = obj.TYPE_CONST
   537  			p.From.Offset = v.AuxInt
   538  			p.To.Type = obj.TYPE_REG
   539  			p.To.Reg = r
   540  			return
   541  		}
   542  		var asm obj.As
   543  		if v.Op == ssa.OpAMD64ADDQconst {
   544  			asm = x86.ALEAQ
   545  		} else {
   546  			asm = x86.ALEAL
   547  		}
   548  		p := s.Prog(asm)
   549  		p.From.Type = obj.TYPE_MEM
   550  		p.From.Reg = a
   551  		p.From.Offset = v.AuxInt
   552  		p.To.Type = obj.TYPE_REG
   553  		p.To.Reg = r
   554  
   555  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
   556  		ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
   557  		ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
   558  		ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
   559  		ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
   560  		ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
   561  		ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
   562  		ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
   563  		ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
   564  		ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
   565  		ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
   566  		ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
   567  		p := s.Prog(v.Op.Asm())
   568  		p.From.Type = obj.TYPE_REG
   569  		p.From.Reg = v.Args[1].Reg()
   570  		p.To.Type = obj.TYPE_REG
   571  		p.To.Reg = v.Reg()
   572  
   573  	case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
   574  		// Flag condition: ^ZERO || PARITY
   575  		// Generate:
   576  		//   CMOV*NE  SRC,DST
   577  		//   CMOV*PS  SRC,DST
   578  		p := s.Prog(v.Op.Asm())
   579  		p.From.Type = obj.TYPE_REG
   580  		p.From.Reg = v.Args[1].Reg()
   581  		p.To.Type = obj.TYPE_REG
   582  		p.To.Reg = v.Reg()
   583  		var q *obj.Prog
   584  		if v.Op == ssa.OpAMD64CMOVQNEF {
   585  			q = s.Prog(x86.ACMOVQPS)
   586  		} else if v.Op == ssa.OpAMD64CMOVLNEF {
   587  			q = s.Prog(x86.ACMOVLPS)
   588  		} else {
   589  			q = s.Prog(x86.ACMOVWPS)
   590  		}
   591  		q.From.Type = obj.TYPE_REG
   592  		q.From.Reg = v.Args[1].Reg()
   593  		q.To.Type = obj.TYPE_REG
   594  		q.To.Reg = v.Reg()
   595  
   596  	case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
   597  		// Flag condition: ZERO && !PARITY
   598  		// Generate:
   599  		//   MOV      SRC,TMP
   600  		//   CMOV*NE  DST,TMP
   601  		//   CMOV*PC  TMP,DST
   602  		//
   603  		// TODO(rasky): we could generate:
   604  		//   CMOV*NE  DST,SRC
   605  		//   CMOV*PC  SRC,DST
   606  		// But this requires a way for regalloc to know that SRC might be
   607  		// clobbered by this instruction.
   608  		t := v.RegTmp()
   609  		opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
   610  
   611  		p := s.Prog(v.Op.Asm())
   612  		p.From.Type = obj.TYPE_REG
   613  		p.From.Reg = v.Reg()
   614  		p.To.Type = obj.TYPE_REG
   615  		p.To.Reg = t
   616  		var q *obj.Prog
   617  		if v.Op == ssa.OpAMD64CMOVQEQF {
   618  			q = s.Prog(x86.ACMOVQPC)
   619  		} else if v.Op == ssa.OpAMD64CMOVLEQF {
   620  			q = s.Prog(x86.ACMOVLPC)
   621  		} else {
   622  			q = s.Prog(x86.ACMOVWPC)
   623  		}
   624  		q.From.Type = obj.TYPE_REG
   625  		q.From.Reg = t
   626  		q.To.Type = obj.TYPE_REG
   627  		q.To.Reg = v.Reg()
   628  
   629  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   630  		r := v.Reg()
   631  		p := s.Prog(v.Op.Asm())
   632  		p.From.Type = obj.TYPE_CONST
   633  		p.From.Offset = v.AuxInt
   634  		p.To.Type = obj.TYPE_REG
   635  		p.To.Reg = r
   636  		p.AddRestSourceReg(v.Args[0].Reg())
   637  
   638  	case ssa.OpAMD64ANDQconst:
   639  		asm := v.Op.Asm()
   640  		// If the constant is positive and fits into 32 bits, use ANDL.
   641  		// This saves a few bytes of encoding.
   642  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   643  			asm = x86.AANDL
   644  		}
   645  		p := s.Prog(asm)
   646  		p.From.Type = obj.TYPE_CONST
   647  		p.From.Offset = v.AuxInt
   648  		p.To.Type = obj.TYPE_REG
   649  		p.To.Reg = v.Reg()
   650  
   651  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   652  		ssa.OpAMD64ANDLconst,
   653  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   654  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   655  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   656  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   657  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   658  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   659  		p := s.Prog(v.Op.Asm())
   660  		p.From.Type = obj.TYPE_CONST
   661  		p.From.Offset = v.AuxInt
   662  		p.To.Type = obj.TYPE_REG
   663  		p.To.Reg = v.Reg()
   664  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   665  		r := v.Reg()
   666  		p := s.Prog(v.Op.Asm())
   667  		p.From.Type = obj.TYPE_REG
   668  		p.From.Reg = r
   669  		p.To.Type = obj.TYPE_REG
   670  		p.To.Reg = r
   671  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
   672  		ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
   673  		ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   674  		p := s.Prog(v.Op.Asm())
   675  		memIdx(&p.From, v)
   676  		o := v.Reg()
   677  		p.To.Type = obj.TYPE_REG
   678  		p.To.Reg = o
   679  		if v.AuxInt != 0 && v.Aux == nil {
   680  			// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
   681  			switch v.Op {
   682  			case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   683  				p = s.Prog(x86.ALEAQ)
   684  			case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
   685  				p = s.Prog(x86.ALEAL)
   686  			case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   687  				p = s.Prog(x86.ALEAW)
   688  			}
   689  			p.From.Type = obj.TYPE_MEM
   690  			p.From.Reg = o
   691  			p.To.Type = obj.TYPE_REG
   692  			p.To.Reg = o
   693  		}
   694  		ssagen.AddAux(&p.From, v)
   695  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
   696  		p := s.Prog(v.Op.Asm())
   697  		p.From.Type = obj.TYPE_MEM
   698  		p.From.Reg = v.Args[0].Reg()
   699  		ssagen.AddAux(&p.From, v)
   700  		p.To.Type = obj.TYPE_REG
   701  		p.To.Reg = v.Reg()
   702  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   703  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   704  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   705  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   706  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   707  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   708  		// must account for that right here.
   709  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   710  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   711  		p := s.Prog(v.Op.Asm())
   712  		p.From.Type = obj.TYPE_REG
   713  		p.From.Reg = v.Args[0].Reg()
   714  		p.To.Type = obj.TYPE_CONST
   715  		p.To.Offset = v.AuxInt
   716  	case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
   717  		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   718  		ssa.OpAMD64BTSQconst,
   719  		ssa.OpAMD64BTCQconst,
   720  		ssa.OpAMD64BTRQconst:
   721  		op := v.Op
   722  		if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
   723  			// Emit 32-bit version because it's shorter
   724  			op = ssa.OpAMD64BTLconst
   725  		}
   726  		p := s.Prog(op.Asm())
   727  		p.From.Type = obj.TYPE_CONST
   728  		p.From.Offset = v.AuxInt
   729  		p.To.Type = obj.TYPE_REG
   730  		p.To.Reg = v.Args[0].Reg()
   731  	case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
   732  		p := s.Prog(v.Op.Asm())
   733  		p.From.Type = obj.TYPE_MEM
   734  		p.From.Reg = v.Args[0].Reg()
   735  		ssagen.AddAux(&p.From, v)
   736  		p.To.Type = obj.TYPE_REG
   737  		p.To.Reg = v.Args[1].Reg()
   738  	case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
   739  		sc := v.AuxValAndOff()
   740  		p := s.Prog(v.Op.Asm())
   741  		p.From.Type = obj.TYPE_MEM
   742  		p.From.Reg = v.Args[0].Reg()
   743  		ssagen.AddAux2(&p.From, v, sc.Off64())
   744  		p.To.Type = obj.TYPE_CONST
   745  		p.To.Offset = sc.Val64()
   746  	case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
   747  		p := s.Prog(v.Op.Asm())
   748  		memIdx(&p.From, v)
   749  		ssagen.AddAux(&p.From, v)
   750  		p.To.Type = obj.TYPE_REG
   751  		p.To.Reg = v.Args[2].Reg()
   752  	case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
   753  		sc := v.AuxValAndOff()
   754  		p := s.Prog(v.Op.Asm())
   755  		memIdx(&p.From, v)
   756  		ssagen.AddAux2(&p.From, v, sc.Off64())
   757  		p.To.Type = obj.TYPE_CONST
   758  		p.To.Offset = sc.Val64()
   759  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   760  		x := v.Reg()
   761  
   762  		// If flags aren't live (indicated by v.Aux == nil),
   763  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   764  		if v.AuxInt == 0 && v.Aux == nil {
   765  			opregreg(s, x86.AXORL, x, x)
   766  			break
   767  		}
   768  
   769  		asm := v.Op.Asm()
   770  		// Use MOVL to move a small constant into a register
   771  		// when the constant is positive and fits into 32 bits.
   772  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   773  			// The upper 32bit are zeroed automatically when using MOVL.
   774  			asm = x86.AMOVL
   775  		}
   776  		p := s.Prog(asm)
   777  		p.From.Type = obj.TYPE_CONST
   778  		p.From.Offset = v.AuxInt
   779  		p.To.Type = obj.TYPE_REG
   780  		p.To.Reg = x
   781  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   782  		x := v.Reg()
   783  		p := s.Prog(v.Op.Asm())
   784  		p.From.Type = obj.TYPE_FCONST
   785  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   786  		p.To.Type = obj.TYPE_REG
   787  		p.To.Reg = x
   788  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
   789  		ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
   790  		ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
   791  		p := s.Prog(v.Op.Asm())
   792  		p.From.Type = obj.TYPE_MEM
   793  		p.From.Reg = v.Args[0].Reg()
   794  		ssagen.AddAux(&p.From, v)
   795  		p.To.Type = obj.TYPE_REG
   796  		p.To.Reg = v.Reg()
   797  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
   798  		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
   799  		ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
   800  		p := s.Prog(v.Op.Asm())
   801  		memIdx(&p.From, v)
   802  		ssagen.AddAux(&p.From, v)
   803  		p.To.Type = obj.TYPE_REG
   804  		p.To.Reg = v.Reg()
   805  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
   806  		ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
   807  		ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
   808  		ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
   809  		p := s.Prog(v.Op.Asm())
   810  		p.From.Type = obj.TYPE_REG
   811  		p.From.Reg = v.Args[1].Reg()
   812  		p.To.Type = obj.TYPE_MEM
   813  		p.To.Reg = v.Args[0].Reg()
   814  		ssagen.AddAux(&p.To, v)
   815  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
   816  		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
   817  		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
   818  		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
   819  		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
   820  		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
   821  		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
   822  		ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
   823  		p := s.Prog(v.Op.Asm())
   824  		p.From.Type = obj.TYPE_REG
   825  		p.From.Reg = v.Args[2].Reg()
   826  		memIdx(&p.To, v)
   827  		ssagen.AddAux(&p.To, v)
   828  	case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
   829  		sc := v.AuxValAndOff()
   830  		off := sc.Off64()
   831  		val := sc.Val()
   832  		if val == 1 || val == -1 {
   833  			var asm obj.As
   834  			if v.Op == ssa.OpAMD64ADDQconstmodify {
   835  				if val == 1 {
   836  					asm = x86.AINCQ
   837  				} else {
   838  					asm = x86.ADECQ
   839  				}
   840  			} else {
   841  				if val == 1 {
   842  					asm = x86.AINCL
   843  				} else {
   844  					asm = x86.ADECL
   845  				}
   846  			}
   847  			p := s.Prog(asm)
   848  			p.To.Type = obj.TYPE_MEM
   849  			p.To.Reg = v.Args[0].Reg()
   850  			ssagen.AddAux2(&p.To, v, off)
   851  			break
   852  		}
   853  		fallthrough
   854  	case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
   855  		ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
   856  		ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
   857  		sc := v.AuxValAndOff()
   858  		off := sc.Off64()
   859  		val := sc.Val64()
   860  		p := s.Prog(v.Op.Asm())
   861  		p.From.Type = obj.TYPE_CONST
   862  		p.From.Offset = val
   863  		p.To.Type = obj.TYPE_MEM
   864  		p.To.Reg = v.Args[0].Reg()
   865  		ssagen.AddAux2(&p.To, v, off)
   866  
   867  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   868  		p := s.Prog(v.Op.Asm())
   869  		p.From.Type = obj.TYPE_CONST
   870  		sc := v.AuxValAndOff()
   871  		p.From.Offset = sc.Val64()
   872  		p.To.Type = obj.TYPE_MEM
   873  		p.To.Reg = v.Args[0].Reg()
   874  		ssagen.AddAux2(&p.To, v, sc.Off64())
   875  	case ssa.OpAMD64MOVOstoreconst:
   876  		sc := v.AuxValAndOff()
   877  		if sc.Val() != 0 {
   878  			v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
   879  		}
   880  
   881  		if s.ABI != obj.ABIInternal {
   882  			// zero X15 manually
   883  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   884  		}
   885  		p := s.Prog(v.Op.Asm())
   886  		p.From.Type = obj.TYPE_REG
   887  		p.From.Reg = x86.REG_X15
   888  		p.To.Type = obj.TYPE_MEM
   889  		p.To.Reg = v.Args[0].Reg()
   890  		ssagen.AddAux2(&p.To, v, sc.Off64())
   891  
   892  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
   893  		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
   894  		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
   895  		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
   896  		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
   897  		p := s.Prog(v.Op.Asm())
   898  		p.From.Type = obj.TYPE_CONST
   899  		sc := v.AuxValAndOff()
   900  		p.From.Offset = sc.Val64()
   901  		switch {
   902  		case p.As == x86.AADDQ && p.From.Offset == 1:
   903  			p.As = x86.AINCQ
   904  			p.From.Type = obj.TYPE_NONE
   905  		case p.As == x86.AADDQ && p.From.Offset == -1:
   906  			p.As = x86.ADECQ
   907  			p.From.Type = obj.TYPE_NONE
   908  		case p.As == x86.AADDL && p.From.Offset == 1:
   909  			p.As = x86.AINCL
   910  			p.From.Type = obj.TYPE_NONE
   911  		case p.As == x86.AADDL && p.From.Offset == -1:
   912  			p.As = x86.ADECL
   913  			p.From.Type = obj.TYPE_NONE
   914  		}
   915  		memIdx(&p.To, v)
   916  		ssagen.AddAux2(&p.To, v, sc.Off64())
   917  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   918  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   919  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   920  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   921  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   922  		r := v.Reg()
   923  		// Break false dependency on destination register.
   924  		opregreg(s, x86.AXORPS, r, r)
   925  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   926  	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   927  		var p *obj.Prog
   928  		switch v.Op {
   929  		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   930  			p = s.Prog(x86.AMOVQ)
   931  		case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   932  			p = s.Prog(x86.AMOVL)
   933  		}
   934  		p.From.Type = obj.TYPE_REG
   935  		p.From.Reg = v.Args[0].Reg()
   936  		p.To.Type = obj.TYPE_REG
   937  		p.To.Reg = v.Reg()
   938  	case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
   939  		ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
   940  		ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
   941  		ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
   942  		ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
   943  		p := s.Prog(v.Op.Asm())
   944  		p.From.Type = obj.TYPE_MEM
   945  		p.From.Reg = v.Args[1].Reg()
   946  		ssagen.AddAux(&p.From, v)
   947  		p.To.Type = obj.TYPE_REG
   948  		p.To.Reg = v.Reg()
   949  	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
   950  		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
   951  		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
   952  		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
   953  		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
   954  		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
   955  		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
   956  		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
   957  		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
   958  		p := s.Prog(v.Op.Asm())
   959  
   960  		r, i := v.Args[1].Reg(), v.Args[2].Reg()
   961  		p.From.Type = obj.TYPE_MEM
   962  		p.From.Scale = v.Op.Scale()
   963  		if p.From.Scale == 1 && i == x86.REG_SP {
   964  			r, i = i, r
   965  		}
   966  		p.From.Reg = r
   967  		p.From.Index = i
   968  
   969  		ssagen.AddAux(&p.From, v)
   970  		p.To.Type = obj.TYPE_REG
   971  		p.To.Reg = v.Reg()
   972  	case ssa.OpAMD64DUFFZERO:
   973  		if s.ABI != obj.ABIInternal {
   974  			// zero X15 manually
   975  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   976  		}
   977  		off := duffStart(v.AuxInt)
   978  		adj := duffAdj(v.AuxInt)
   979  		var p *obj.Prog
   980  		if adj != 0 {
   981  			p = s.Prog(x86.ALEAQ)
   982  			p.From.Type = obj.TYPE_MEM
   983  			p.From.Offset = adj
   984  			p.From.Reg = x86.REG_DI
   985  			p.To.Type = obj.TYPE_REG
   986  			p.To.Reg = x86.REG_DI
   987  		}
   988  		p = s.Prog(obj.ADUFFZERO)
   989  		p.To.Type = obj.TYPE_ADDR
   990  		p.To.Sym = ir.Syms.Duffzero
   991  		p.To.Offset = off
   992  	case ssa.OpAMD64DUFFCOPY:
   993  		p := s.Prog(obj.ADUFFCOPY)
   994  		p.To.Type = obj.TYPE_ADDR
   995  		p.To.Sym = ir.Syms.Duffcopy
   996  		if v.AuxInt%16 != 0 {
   997  			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
   998  		}
   999  		p.To.Offset = 14 * (64 - v.AuxInt/16)
  1000  		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
  1001  		//	MOVUPS	(SI), X0
  1002  		//	ADDQ	$16, SI
  1003  		//	MOVUPS	X0, (DI)
  1004  		//	ADDQ	$16, DI
  1005  		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
  1006  
  1007  	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
  1008  		if v.Type.IsMemory() {
  1009  			return
  1010  		}
  1011  		x := v.Args[0].Reg()
  1012  		y := v.Reg()
  1013  		if x != y {
  1014  			opregreg(s, moveByType(v.Type), y, x)
  1015  		}
  1016  	case ssa.OpLoadReg:
  1017  		if v.Type.IsFlags() {
  1018  			v.Fatalf("load flags not implemented: %v", v.LongString())
  1019  			return
  1020  		}
  1021  		p := s.Prog(loadByType(v.Type))
  1022  		ssagen.AddrAuto(&p.From, v.Args[0])
  1023  		p.To.Type = obj.TYPE_REG
  1024  		p.To.Reg = v.Reg()
  1025  
  1026  	case ssa.OpStoreReg:
  1027  		if v.Type.IsFlags() {
  1028  			v.Fatalf("store flags not implemented: %v", v.LongString())
  1029  			return
  1030  		}
  1031  		p := s.Prog(storeByType(v.Type))
  1032  		p.From.Type = obj.TYPE_REG
  1033  		p.From.Reg = v.Args[0].Reg()
  1034  		ssagen.AddrAuto(&p.To, v)
  1035  	case ssa.OpAMD64LoweredHasCPUFeature:
  1036  		p := s.Prog(x86.AMOVBLZX)
  1037  		p.From.Type = obj.TYPE_MEM
  1038  		ssagen.AddAux(&p.From, v)
  1039  		p.To.Type = obj.TYPE_REG
  1040  		p.To.Reg = v.Reg()
  1041  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
  1042  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
  1043  		// The loop only runs once.
  1044  		for _, ap := range v.Block.Func.RegArgs {
  1045  			// Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
  1046  			addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
  1047  			s.FuncInfo().AddSpill(
  1048  				obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
  1049  		}
  1050  		v.Block.Func.RegArgs = nil
  1051  		ssagen.CheckArgReg(v)
  1052  	case ssa.OpAMD64LoweredGetClosurePtr:
  1053  		// Closure pointer is DX.
  1054  		ssagen.CheckLoweredGetClosurePtr(v)
  1055  	case ssa.OpAMD64LoweredGetG:
  1056  		if s.ABI == obj.ABIInternal {
  1057  			v.Fatalf("LoweredGetG should not appear in ABIInternal")
  1058  		}
  1059  		r := v.Reg()
  1060  		getgFromTLS(s, r)
  1061  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
  1062  		if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
  1063  			// zeroing X15 when entering ABIInternal from ABI0
  1064  			if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
  1065  				opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1066  			}
  1067  			// set G register from TLS
  1068  			getgFromTLS(s, x86.REG_R14)
  1069  		}
  1070  		if v.Op == ssa.OpAMD64CALLtail {
  1071  			s.TailCall(v)
  1072  			break
  1073  		}
  1074  		s.Call(v)
  1075  		if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
  1076  			// zeroing X15 when entering ABIInternal from ABI0
  1077  			if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
  1078  				opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1079  			}
  1080  			// set G register from TLS
  1081  			getgFromTLS(s, x86.REG_R14)
  1082  		}
  1083  	case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
  1084  		s.Call(v)
  1085  
  1086  	case ssa.OpAMD64LoweredGetCallerPC:
  1087  		p := s.Prog(x86.AMOVQ)
  1088  		p.From.Type = obj.TYPE_MEM
  1089  		p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
  1090  		p.From.Name = obj.NAME_PARAM
  1091  		p.To.Type = obj.TYPE_REG
  1092  		p.To.Reg = v.Reg()
  1093  
  1094  	case ssa.OpAMD64LoweredGetCallerSP:
  1095  		// caller's SP is the address of the first arg
  1096  		mov := x86.AMOVQ
  1097  		if types.PtrSize == 4 {
  1098  			mov = x86.AMOVL
  1099  		}
  1100  		p := s.Prog(mov)
  1101  		p.From.Type = obj.TYPE_ADDR
  1102  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
  1103  		p.From.Name = obj.NAME_PARAM
  1104  		p.To.Type = obj.TYPE_REG
  1105  		p.To.Reg = v.Reg()
  1106  
  1107  	case ssa.OpAMD64LoweredWB:
  1108  		p := s.Prog(obj.ACALL)
  1109  		p.To.Type = obj.TYPE_MEM
  1110  		p.To.Name = obj.NAME_EXTERN
  1111  		// AuxInt encodes how many buffer entries we need.
  1112  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1113  
  1114  	case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
  1115  		p := s.Prog(obj.ACALL)
  1116  		p.To.Type = obj.TYPE_MEM
  1117  		p.To.Name = obj.NAME_EXTERN
  1118  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1119  		s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
  1120  
  1121  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
  1122  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
  1123  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
  1124  		p := s.Prog(v.Op.Asm())
  1125  		p.To.Type = obj.TYPE_REG
  1126  		p.To.Reg = v.Reg()
  1127  
  1128  	case ssa.OpAMD64NEGLflags:
  1129  		p := s.Prog(v.Op.Asm())
  1130  		p.To.Type = obj.TYPE_REG
  1131  		p.To.Reg = v.Reg0()
  1132  
  1133  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1134  		p := s.Prog(v.Op.Asm())
  1135  		p.From.Type = obj.TYPE_REG
  1136  		p.From.Reg = v.Args[0].Reg()
  1137  		p.To.Type = obj.TYPE_REG
  1138  		switch v.Op {
  1139  		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
  1140  			p.To.Reg = v.Reg0()
  1141  		case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1142  			p.To.Reg = v.Reg()
  1143  		}
  1144  	case ssa.OpAMD64ROUNDSD:
  1145  		p := s.Prog(v.Op.Asm())
  1146  		val := v.AuxInt
  1147  		// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
  1148  		if val < 0 || val > 3 {
  1149  			v.Fatalf("Invalid rounding mode")
  1150  		}
  1151  		p.From.Offset = val
  1152  		p.From.Type = obj.TYPE_CONST
  1153  		p.AddRestSourceReg(v.Args[0].Reg())
  1154  		p.To.Type = obj.TYPE_REG
  1155  		p.To.Reg = v.Reg()
  1156  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
  1157  		ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
  1158  		ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
  1159  		if v.Args[0].Reg() != v.Reg() {
  1160  			// POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus.
  1161  			// TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7.
  1162  			// Xor register with itself to break the dependency.
  1163  			opregreg(s, x86.AXORL, v.Reg(), v.Reg())
  1164  		}
  1165  		p := s.Prog(v.Op.Asm())
  1166  		p.From.Type = obj.TYPE_REG
  1167  		p.From.Reg = v.Args[0].Reg()
  1168  		p.To.Type = obj.TYPE_REG
  1169  		p.To.Reg = v.Reg()
  1170  
  1171  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
  1172  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
  1173  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
  1174  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
  1175  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
  1176  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
  1177  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
  1178  		ssa.OpAMD64SETO:
  1179  		p := s.Prog(v.Op.Asm())
  1180  		p.To.Type = obj.TYPE_REG
  1181  		p.To.Reg = v.Reg()
  1182  
  1183  	case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
  1184  		ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
  1185  		ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
  1186  		ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
  1187  		ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
  1188  		p := s.Prog(v.Op.Asm())
  1189  		p.To.Type = obj.TYPE_MEM
  1190  		p.To.Reg = v.Args[0].Reg()
  1191  		ssagen.AddAux(&p.To, v)
  1192  
  1193  	case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
  1194  		ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
  1195  		ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
  1196  		ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
  1197  		ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
  1198  		p := s.Prog(v.Op.Asm())
  1199  		memIdx(&p.To, v)
  1200  		ssagen.AddAux(&p.To, v)
  1201  
  1202  	case ssa.OpAMD64SETNEF:
  1203  		t := v.RegTmp()
  1204  		p := s.Prog(v.Op.Asm())
  1205  		p.To.Type = obj.TYPE_REG
  1206  		p.To.Reg = v.Reg()
  1207  		q := s.Prog(x86.ASETPS)
  1208  		q.To.Type = obj.TYPE_REG
  1209  		q.To.Reg = t
  1210  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
  1211  		opregreg(s, x86.AORL, v.Reg(), t)
  1212  
  1213  	case ssa.OpAMD64SETEQF:
  1214  		t := v.RegTmp()
  1215  		p := s.Prog(v.Op.Asm())
  1216  		p.To.Type = obj.TYPE_REG
  1217  		p.To.Reg = v.Reg()
  1218  		q := s.Prog(x86.ASETPC)
  1219  		q.To.Type = obj.TYPE_REG
  1220  		q.To.Reg = t
  1221  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
  1222  		opregreg(s, x86.AANDL, v.Reg(), t)
  1223  
  1224  	case ssa.OpAMD64InvertFlags:
  1225  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1226  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
  1227  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1228  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
  1229  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
  1230  	case ssa.OpAMD64REPSTOSQ:
  1231  		s.Prog(x86.AREP)
  1232  		s.Prog(x86.ASTOSQ)
  1233  	case ssa.OpAMD64REPMOVSQ:
  1234  		s.Prog(x86.AREP)
  1235  		s.Prog(x86.AMOVSQ)
  1236  	case ssa.OpAMD64LoweredNilCheck:
  1237  		// Issue a load which will fault if the input is nil.
  1238  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
  1239  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
  1240  		// but it doesn't have false dependency on AX.
  1241  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
  1242  		// That trades clobbering flags for clobbering a register.
  1243  		p := s.Prog(x86.ATESTB)
  1244  		p.From.Type = obj.TYPE_REG
  1245  		p.From.Reg = x86.REG_AX
  1246  		p.To.Type = obj.TYPE_MEM
  1247  		p.To.Reg = v.Args[0].Reg()
  1248  		if logopt.Enabled() {
  1249  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1250  		}
  1251  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1252  			base.WarnfAt(v.Pos, "generated nil check")
  1253  		}
  1254  	case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
  1255  		p := s.Prog(v.Op.Asm())
  1256  		p.From.Type = obj.TYPE_MEM
  1257  		p.From.Reg = v.Args[0].Reg()
  1258  		ssagen.AddAux(&p.From, v)
  1259  		p.To.Type = obj.TYPE_REG
  1260  		p.To.Reg = v.Reg0()
  1261  	case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
  1262  		p := s.Prog(v.Op.Asm())
  1263  		p.From.Type = obj.TYPE_REG
  1264  		p.From.Reg = v.Reg0()
  1265  		p.To.Type = obj.TYPE_MEM
  1266  		p.To.Reg = v.Args[1].Reg()
  1267  		ssagen.AddAux(&p.To, v)
  1268  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
  1269  		s.Prog(x86.ALOCK)
  1270  		p := s.Prog(v.Op.Asm())
  1271  		p.From.Type = obj.TYPE_REG
  1272  		p.From.Reg = v.Reg0()
  1273  		p.To.Type = obj.TYPE_MEM
  1274  		p.To.Reg = v.Args[1].Reg()
  1275  		ssagen.AddAux(&p.To, v)
  1276  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
  1277  		if v.Args[1].Reg() != x86.REG_AX {
  1278  			v.Fatalf("input[1] not in AX %s", v.LongString())
  1279  		}
  1280  		s.Prog(x86.ALOCK)
  1281  		p := s.Prog(v.Op.Asm())
  1282  		p.From.Type = obj.TYPE_REG
  1283  		p.From.Reg = v.Args[2].Reg()
  1284  		p.To.Type = obj.TYPE_MEM
  1285  		p.To.Reg = v.Args[0].Reg()
  1286  		ssagen.AddAux(&p.To, v)
  1287  		p = s.Prog(x86.ASETEQ)
  1288  		p.To.Type = obj.TYPE_REG
  1289  		p.To.Reg = v.Reg0()
  1290  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
  1291  		s.Prog(x86.ALOCK)
  1292  		p := s.Prog(v.Op.Asm())
  1293  		p.From.Type = obj.TYPE_REG
  1294  		p.From.Reg = v.Args[1].Reg()
  1295  		p.To.Type = obj.TYPE_MEM
  1296  		p.To.Reg = v.Args[0].Reg()
  1297  		ssagen.AddAux(&p.To, v)
  1298  	case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
  1299  		p := s.Prog(v.Op.Asm())
  1300  		p.From.Type = obj.TYPE_MEM
  1301  		p.From.Reg = v.Args[0].Reg()
  1302  	case ssa.OpClobber:
  1303  		p := s.Prog(x86.AMOVL)
  1304  		p.From.Type = obj.TYPE_CONST
  1305  		p.From.Offset = 0xdeaddead
  1306  		p.To.Type = obj.TYPE_MEM
  1307  		p.To.Reg = x86.REG_SP
  1308  		ssagen.AddAux(&p.To, v)
  1309  		p = s.Prog(x86.AMOVL)
  1310  		p.From.Type = obj.TYPE_CONST
  1311  		p.From.Offset = 0xdeaddead
  1312  		p.To.Type = obj.TYPE_MEM
  1313  		p.To.Reg = x86.REG_SP
  1314  		ssagen.AddAux(&p.To, v)
  1315  		p.To.Offset += 4
  1316  	case ssa.OpClobberReg:
  1317  		x := uint64(0xdeaddeaddeaddead)
  1318  		p := s.Prog(x86.AMOVQ)
  1319  		p.From.Type = obj.TYPE_CONST
  1320  		p.From.Offset = int64(x)
  1321  		p.To.Type = obj.TYPE_REG
  1322  		p.To.Reg = v.Reg()
  1323  	default:
  1324  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1325  	}
  1326  }
  1327  
  1328  var blockJump = [...]struct {
  1329  	asm, invasm obj.As
  1330  }{
  1331  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
  1332  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
  1333  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
  1334  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
  1335  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
  1336  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
  1337  	ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
  1338  	ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
  1339  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
  1340  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
  1341  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
  1342  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
  1343  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
  1344  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
  1345  }
  1346  
  1347  var eqfJumps = [2][2]ssagen.IndexJump{
  1348  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1349  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1350  }
  1351  var nefJumps = [2][2]ssagen.IndexJump{
  1352  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1353  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1354  }
  1355  
  1356  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1357  	switch b.Kind {
  1358  	case ssa.BlockPlain:
  1359  		if b.Succs[0].Block() != next {
  1360  			p := s.Prog(obj.AJMP)
  1361  			p.To.Type = obj.TYPE_BRANCH
  1362  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1363  		}
  1364  	case ssa.BlockDefer:
  1365  		// defer returns in rax:
  1366  		// 0 if we should continue executing
  1367  		// 1 if we should jump to deferreturn call
  1368  		p := s.Prog(x86.ATESTL)
  1369  		p.From.Type = obj.TYPE_REG
  1370  		p.From.Reg = x86.REG_AX
  1371  		p.To.Type = obj.TYPE_REG
  1372  		p.To.Reg = x86.REG_AX
  1373  		p = s.Prog(x86.AJNE)
  1374  		p.To.Type = obj.TYPE_BRANCH
  1375  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  1376  		if b.Succs[0].Block() != next {
  1377  			p := s.Prog(obj.AJMP)
  1378  			p.To.Type = obj.TYPE_BRANCH
  1379  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1380  		}
  1381  	case ssa.BlockExit, ssa.BlockRetJmp:
  1382  	case ssa.BlockRet:
  1383  		s.Prog(obj.ARET)
  1384  
  1385  	case ssa.BlockAMD64EQF:
  1386  		s.CombJump(b, next, &eqfJumps)
  1387  
  1388  	case ssa.BlockAMD64NEF:
  1389  		s.CombJump(b, next, &nefJumps)
  1390  
  1391  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1392  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1393  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1394  		ssa.BlockAMD64OS, ssa.BlockAMD64OC,
  1395  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1396  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1397  		jmp := blockJump[b.Kind]
  1398  		switch next {
  1399  		case b.Succs[0].Block():
  1400  			s.Br(jmp.invasm, b.Succs[1].Block())
  1401  		case b.Succs[1].Block():
  1402  			s.Br(jmp.asm, b.Succs[0].Block())
  1403  		default:
  1404  			if b.Likely != ssa.BranchUnlikely {
  1405  				s.Br(jmp.asm, b.Succs[0].Block())
  1406  				s.Br(obj.AJMP, b.Succs[1].Block())
  1407  			} else {
  1408  				s.Br(jmp.invasm, b.Succs[1].Block())
  1409  				s.Br(obj.AJMP, b.Succs[0].Block())
  1410  			}
  1411  		}
  1412  
  1413  	case ssa.BlockAMD64JUMPTABLE:
  1414  		// JMP      *(TABLE)(INDEX*8)
  1415  		p := s.Prog(obj.AJMP)
  1416  		p.To.Type = obj.TYPE_MEM
  1417  		p.To.Reg = b.Controls[1].Reg()
  1418  		p.To.Index = b.Controls[0].Reg()
  1419  		p.To.Scale = 8
  1420  		// Save jump tables for later resolution of the target blocks.
  1421  		s.JumpTables = append(s.JumpTables, b)
  1422  
  1423  	default:
  1424  		b.Fatalf("branch not implemented: %s", b.LongString())
  1425  	}
  1426  }
  1427  
  1428  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1429  	p := s.Prog(loadByType(t))
  1430  	p.From.Type = obj.TYPE_MEM
  1431  	p.From.Name = obj.NAME_AUTO
  1432  	p.From.Sym = n.Linksym()
  1433  	p.From.Offset = n.FrameOffset() + off
  1434  	p.To.Type = obj.TYPE_REG
  1435  	p.To.Reg = reg
  1436  	return p
  1437  }
  1438  
  1439  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1440  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1441  	p.To.Name = obj.NAME_PARAM
  1442  	p.To.Sym = n.Linksym()
  1443  	p.Pos = p.Pos.WithNotStmt()
  1444  	return p
  1445  }