github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		case 8:
    74  			return x86.AMOVQ
    75  		}
    76  	}
    77  	panic("bad store type")
    78  }
    79  
    80  // moveByType returns the reg->reg move instruction of the given type.
    81  func moveByType(t *types.Type) obj.As {
    82  	if t.IsFloat() {
    83  		// Moving the whole sse2 register is faster
    84  		// than moving just the correct low portion of it.
    85  		// There is no xmm->xmm move with 1 byte opcode,
    86  		// so use movups, which has 2 byte opcode.
    87  		return x86.AMOVUPS
    88  	} else {
    89  		switch t.Size() {
    90  		case 1:
    91  			// Avoids partial register write
    92  			return x86.AMOVL
    93  		case 2:
    94  			return x86.AMOVL
    95  		case 4:
    96  			return x86.AMOVL
    97  		case 8:
    98  			return x86.AMOVQ
    99  		case 16:
   100  			return x86.AMOVUPS // int128s are in SSE registers
   101  		default:
   102  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   103  		}
   104  	}
   105  }
   106  
   107  // opregreg emits instructions for
   108  //     dest := dest(To) op src(From)
   109  // and also returns the created obj.Prog so it
   110  // may be further adjusted (offset, scale, etc).
   111  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   112  	p := s.Prog(op)
   113  	p.From.Type = obj.TYPE_REG
   114  	p.To.Type = obj.TYPE_REG
   115  	p.To.Reg = dest
   116  	p.From.Reg = src
   117  	return p
   118  }
   119  
   120  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   121  // See runtime/mkduff.go.
   122  func duffStart(size int64) int64 {
   123  	x, _ := duff(size)
   124  	return x
   125  }
   126  func duffAdj(size int64) int64 {
   127  	_, x := duff(size)
   128  	return x
   129  }
   130  
   131  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   132  // required to use the duffzero mechanism for a block of the given size.
   133  func duff(size int64) (int64, int64) {
   134  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   135  		panic("bad duffzero size")
   136  	}
   137  	steps := size / dzClearStep
   138  	blocks := steps / dzBlockLen
   139  	steps %= dzBlockLen
   140  	off := dzBlockSize * (dzBlocks - blocks)
   141  	var adj int64
   142  	if steps != 0 {
   143  		off -= dzLeaqSize
   144  		off -= dzMovSize * steps
   145  		adj -= dzClearStep * (dzBlockLen - steps)
   146  	}
   147  	return off, adj
   148  }
   149  
   150  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   151  	switch v.Op {
   152  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   153  		r := v.Reg()
   154  		r1 := v.Args[0].Reg()
   155  		r2 := v.Args[1].Reg()
   156  		switch {
   157  		case r == r1:
   158  			p := s.Prog(v.Op.Asm())
   159  			p.From.Type = obj.TYPE_REG
   160  			p.From.Reg = r2
   161  			p.To.Type = obj.TYPE_REG
   162  			p.To.Reg = r
   163  		case r == r2:
   164  			p := s.Prog(v.Op.Asm())
   165  			p.From.Type = obj.TYPE_REG
   166  			p.From.Reg = r1
   167  			p.To.Type = obj.TYPE_REG
   168  			p.To.Reg = r
   169  		default:
   170  			var asm obj.As
   171  			if v.Op == ssa.OpAMD64ADDQ {
   172  				asm = x86.ALEAQ
   173  			} else {
   174  				asm = x86.ALEAL
   175  			}
   176  			p := s.Prog(asm)
   177  			p.From.Type = obj.TYPE_MEM
   178  			p.From.Reg = r1
   179  			p.From.Scale = 1
   180  			p.From.Index = r2
   181  			p.To.Type = obj.TYPE_REG
   182  			p.To.Reg = r
   183  		}
   184  	// 2-address opcode arithmetic
   185  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   186  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   187  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   188  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   189  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   190  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   191  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   192  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   193  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   194  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   195  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   196  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   197  		ssa.OpAMD64PXOR:
   198  		r := v.Reg()
   199  		if r != v.Args[0].Reg() {
   200  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   201  		}
   202  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   203  
   204  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   205  		// Arg[0] (the dividend) is in AX.
   206  		// Arg[1] (the divisor) can be in any other register.
   207  		// Result[0] (the quotient) is in AX.
   208  		// Result[1] (the remainder) is in DX.
   209  		r := v.Args[1].Reg()
   210  
   211  		// Zero extend dividend.
   212  		c := s.Prog(x86.AXORL)
   213  		c.From.Type = obj.TYPE_REG
   214  		c.From.Reg = x86.REG_DX
   215  		c.To.Type = obj.TYPE_REG
   216  		c.To.Reg = x86.REG_DX
   217  
   218  		// Issue divide.
   219  		p := s.Prog(v.Op.Asm())
   220  		p.From.Type = obj.TYPE_REG
   221  		p.From.Reg = r
   222  
   223  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   224  		// Arg[0] (the dividend) is in AX.
   225  		// Arg[1] (the divisor) can be in any other register.
   226  		// Result[0] (the quotient) is in AX.
   227  		// Result[1] (the remainder) is in DX.
   228  		r := v.Args[1].Reg()
   229  
   230  		// CPU faults upon signed overflow, which occurs when the most
   231  		// negative int is divided by -1. Handle divide by -1 as a special case.
   232  		var c *obj.Prog
   233  		switch v.Op {
   234  		case ssa.OpAMD64DIVQ:
   235  			c = s.Prog(x86.ACMPQ)
   236  		case ssa.OpAMD64DIVL:
   237  			c = s.Prog(x86.ACMPL)
   238  		case ssa.OpAMD64DIVW:
   239  			c = s.Prog(x86.ACMPW)
   240  		}
   241  		c.From.Type = obj.TYPE_REG
   242  		c.From.Reg = r
   243  		c.To.Type = obj.TYPE_CONST
   244  		c.To.Offset = -1
   245  		j1 := s.Prog(x86.AJEQ)
   246  		j1.To.Type = obj.TYPE_BRANCH
   247  
   248  		// Sign extend dividend.
   249  		switch v.Op {
   250  		case ssa.OpAMD64DIVQ:
   251  			s.Prog(x86.ACQO)
   252  		case ssa.OpAMD64DIVL:
   253  			s.Prog(x86.ACDQ)
   254  		case ssa.OpAMD64DIVW:
   255  			s.Prog(x86.ACWD)
   256  		}
   257  
   258  		// Issue divide.
   259  		p := s.Prog(v.Op.Asm())
   260  		p.From.Type = obj.TYPE_REG
   261  		p.From.Reg = r
   262  
   263  		// Skip over -1 fixup code.
   264  		j2 := s.Prog(obj.AJMP)
   265  		j2.To.Type = obj.TYPE_BRANCH
   266  
   267  		// Issue -1 fixup code.
   268  		// n / -1 = -n
   269  		n1 := s.Prog(x86.ANEGQ)
   270  		n1.To.Type = obj.TYPE_REG
   271  		n1.To.Reg = x86.REG_AX
   272  
   273  		// n % -1 == 0
   274  		n2 := s.Prog(x86.AXORL)
   275  		n2.From.Type = obj.TYPE_REG
   276  		n2.From.Reg = x86.REG_DX
   277  		n2.To.Type = obj.TYPE_REG
   278  		n2.To.Reg = x86.REG_DX
   279  
   280  		// TODO(khr): issue only the -1 fixup code we need.
   281  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   282  
   283  		j1.To.Val = n1
   284  		j2.To.Val = s.Pc()
   285  
   286  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   287  		// the frontend rewrites constant division by 8/16/32 bit integers into
   288  		// HMUL by a constant
   289  		// SSA rewrites generate the 64 bit versions
   290  
   291  		// Arg[0] is already in AX as it's the only register we allow
   292  		// and DX is the only output we care about (the high bits)
   293  		p := s.Prog(v.Op.Asm())
   294  		p.From.Type = obj.TYPE_REG
   295  		p.From.Reg = v.Args[1].Reg()
   296  
   297  		// IMULB puts the high portion in AH instead of DL,
   298  		// so move it to DL for consistency
   299  		if v.Type.Size() == 1 {
   300  			m := s.Prog(x86.AMOVB)
   301  			m.From.Type = obj.TYPE_REG
   302  			m.From.Reg = x86.REG_AH
   303  			m.To.Type = obj.TYPE_REG
   304  			m.To.Reg = x86.REG_DX
   305  		}
   306  
   307  	case ssa.OpAMD64MULQU2:
   308  		// Arg[0] is already in AX as it's the only register we allow
   309  		// results hi in DX, lo in AX
   310  		p := s.Prog(v.Op.Asm())
   311  		p.From.Type = obj.TYPE_REG
   312  		p.From.Reg = v.Args[1].Reg()
   313  
   314  	case ssa.OpAMD64DIVQU2:
   315  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   316  		// results q in AX, r in DX
   317  		p := s.Prog(v.Op.Asm())
   318  		p.From.Type = obj.TYPE_REG
   319  		p.From.Reg = v.Args[2].Reg()
   320  
   321  	case ssa.OpAMD64AVGQU:
   322  		// compute (x+y)/2 unsigned.
   323  		// Do a 64-bit add, the overflow goes into the carry.
   324  		// Shift right once and pull the carry back into the 63rd bit.
   325  		r := v.Reg()
   326  		if r != v.Args[0].Reg() {
   327  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   328  		}
   329  		p := s.Prog(x86.AADDQ)
   330  		p.From.Type = obj.TYPE_REG
   331  		p.To.Type = obj.TYPE_REG
   332  		p.To.Reg = r
   333  		p.From.Reg = v.Args[1].Reg()
   334  		p = s.Prog(x86.ARCRQ)
   335  		p.From.Type = obj.TYPE_CONST
   336  		p.From.Offset = 1
   337  		p.To.Type = obj.TYPE_REG
   338  		p.To.Reg = r
   339  
   340  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   341  		r := v.Reg()
   342  		a := v.Args[0].Reg()
   343  		if r == a {
   344  			if v.AuxInt == 1 {
   345  				var asm obj.As
   346  				// Software optimization manual recommends add $1,reg.
   347  				// But inc/dec is 1 byte smaller. ICC always uses inc
   348  				// Clang/GCC choose depending on flags, but prefer add.
   349  				// Experiments show that inc/dec is both a little faster
   350  				// and make a binary a little smaller.
   351  				if v.Op == ssa.OpAMD64ADDQconst {
   352  					asm = x86.AINCQ
   353  				} else {
   354  					asm = x86.AINCL
   355  				}
   356  				p := s.Prog(asm)
   357  				p.To.Type = obj.TYPE_REG
   358  				p.To.Reg = r
   359  				return
   360  			}
   361  			if v.AuxInt == -1 {
   362  				var asm obj.As
   363  				if v.Op == ssa.OpAMD64ADDQconst {
   364  					asm = x86.ADECQ
   365  				} else {
   366  					asm = x86.ADECL
   367  				}
   368  				p := s.Prog(asm)
   369  				p.To.Type = obj.TYPE_REG
   370  				p.To.Reg = r
   371  				return
   372  			}
   373  			p := s.Prog(v.Op.Asm())
   374  			p.From.Type = obj.TYPE_CONST
   375  			p.From.Offset = v.AuxInt
   376  			p.To.Type = obj.TYPE_REG
   377  			p.To.Reg = r
   378  			return
   379  		}
   380  		var asm obj.As
   381  		if v.Op == ssa.OpAMD64ADDQconst {
   382  			asm = x86.ALEAQ
   383  		} else {
   384  			asm = x86.ALEAL
   385  		}
   386  		p := s.Prog(asm)
   387  		p.From.Type = obj.TYPE_MEM
   388  		p.From.Reg = a
   389  		p.From.Offset = v.AuxInt
   390  		p.To.Type = obj.TYPE_REG
   391  		p.To.Reg = r
   392  
   393  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   394  		r := v.Reg()
   395  		if r != v.Args[0].Reg() {
   396  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   397  		}
   398  		p := s.Prog(v.Op.Asm())
   399  		p.From.Type = obj.TYPE_REG
   400  		p.From.Reg = v.Args[1].Reg()
   401  		p.To.Type = obj.TYPE_REG
   402  		p.To.Reg = r
   403  
   404  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   405  		r := v.Reg()
   406  		if r != v.Args[0].Reg() {
   407  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   408  		}
   409  		p := s.Prog(v.Op.Asm())
   410  		p.From.Type = obj.TYPE_CONST
   411  		p.From.Offset = v.AuxInt
   412  		p.To.Type = obj.TYPE_REG
   413  		p.To.Reg = r
   414  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   415  		// then we don't need to use resultInArg0 for these ops.
   416  		//p.From3 = new(obj.Addr)
   417  		//p.From3.Type = obj.TYPE_REG
   418  		//p.From3.Reg = v.Args[0].Reg()
   419  
   420  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   421  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   422  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   423  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   424  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   425  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   426  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   427  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   428  		r := v.Reg()
   429  		if r != v.Args[0].Reg() {
   430  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   431  		}
   432  		p := s.Prog(v.Op.Asm())
   433  		p.From.Type = obj.TYPE_CONST
   434  		p.From.Offset = v.AuxInt
   435  		p.To.Type = obj.TYPE_REG
   436  		p.To.Reg = r
   437  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   438  		r := v.Reg()
   439  		p := s.Prog(v.Op.Asm())
   440  		p.From.Type = obj.TYPE_REG
   441  		p.From.Reg = r
   442  		p.To.Type = obj.TYPE_REG
   443  		p.To.Reg = r
   444  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   445  		r := v.Args[0].Reg()
   446  		i := v.Args[1].Reg()
   447  		p := s.Prog(x86.ALEAQ)
   448  		switch v.Op {
   449  		case ssa.OpAMD64LEAQ1:
   450  			p.From.Scale = 1
   451  			if i == x86.REG_SP {
   452  				r, i = i, r
   453  			}
   454  		case ssa.OpAMD64LEAQ2:
   455  			p.From.Scale = 2
   456  		case ssa.OpAMD64LEAQ4:
   457  			p.From.Scale = 4
   458  		case ssa.OpAMD64LEAQ8:
   459  			p.From.Scale = 8
   460  		}
   461  		p.From.Type = obj.TYPE_MEM
   462  		p.From.Reg = r
   463  		p.From.Index = i
   464  		gc.AddAux(&p.From, v)
   465  		p.To.Type = obj.TYPE_REG
   466  		p.To.Reg = v.Reg()
   467  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   468  		p := s.Prog(v.Op.Asm())
   469  		p.From.Type = obj.TYPE_MEM
   470  		p.From.Reg = v.Args[0].Reg()
   471  		gc.AddAux(&p.From, v)
   472  		p.To.Type = obj.TYPE_REG
   473  		p.To.Reg = v.Reg()
   474  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   475  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   476  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   477  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   478  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   479  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   480  		// must account for that right here.
   481  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   482  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   483  		p := s.Prog(v.Op.Asm())
   484  		p.From.Type = obj.TYPE_REG
   485  		p.From.Reg = v.Args[0].Reg()
   486  		p.To.Type = obj.TYPE_CONST
   487  		p.To.Offset = v.AuxInt
   488  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   489  		ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
   490  		p := s.Prog(v.Op.Asm())
   491  		p.From.Type = obj.TYPE_CONST
   492  		p.From.Offset = v.AuxInt
   493  		p.To.Type = obj.TYPE_REG
   494  		p.To.Reg = v.Args[0].Reg()
   495  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   496  		x := v.Reg()
   497  		asm := v.Op.Asm()
   498  		// Use MOVL to move a small constant into a register
   499  		// when the constant is positive and fits into 32 bits.
   500  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   501  			// The upper 32bit are zeroed automatically when using MOVL.
   502  			asm = x86.AMOVL
   503  		}
   504  		p := s.Prog(asm)
   505  		p.From.Type = obj.TYPE_CONST
   506  		p.From.Offset = v.AuxInt
   507  		p.To.Type = obj.TYPE_REG
   508  		p.To.Reg = x
   509  		// If flags are live at this instruction, suppress the
   510  		// MOV $0,AX -> XOR AX,AX optimization.
   511  		if v.Aux != nil {
   512  			p.Mark |= x86.PRESERVEFLAGS
   513  		}
   514  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   515  		x := v.Reg()
   516  		p := s.Prog(v.Op.Asm())
   517  		p.From.Type = obj.TYPE_FCONST
   518  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   519  		p.To.Type = obj.TYPE_REG
   520  		p.To.Reg = x
   521  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   522  		p := s.Prog(v.Op.Asm())
   523  		p.From.Type = obj.TYPE_MEM
   524  		p.From.Reg = v.Args[0].Reg()
   525  		gc.AddAux(&p.From, v)
   526  		p.To.Type = obj.TYPE_REG
   527  		p.To.Reg = v.Reg()
   528  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8:
   529  		p := s.Prog(v.Op.Asm())
   530  		p.From.Type = obj.TYPE_MEM
   531  		p.From.Reg = v.Args[0].Reg()
   532  		gc.AddAux(&p.From, v)
   533  		p.From.Scale = 8
   534  		p.From.Index = v.Args[1].Reg()
   535  		p.To.Type = obj.TYPE_REG
   536  		p.To.Reg = v.Reg()
   537  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   538  		p := s.Prog(v.Op.Asm())
   539  		p.From.Type = obj.TYPE_MEM
   540  		p.From.Reg = v.Args[0].Reg()
   541  		gc.AddAux(&p.From, v)
   542  		p.From.Scale = 4
   543  		p.From.Index = v.Args[1].Reg()
   544  		p.To.Type = obj.TYPE_REG
   545  		p.To.Reg = v.Reg()
   546  	case ssa.OpAMD64MOVWloadidx2:
   547  		p := s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_MEM
   549  		p.From.Reg = v.Args[0].Reg()
   550  		gc.AddAux(&p.From, v)
   551  		p.From.Scale = 2
   552  		p.From.Index = v.Args[1].Reg()
   553  		p.To.Type = obj.TYPE_REG
   554  		p.To.Reg = v.Reg()
   555  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   556  		r := v.Args[0].Reg()
   557  		i := v.Args[1].Reg()
   558  		if i == x86.REG_SP {
   559  			r, i = i, r
   560  		}
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_MEM
   563  		p.From.Reg = r
   564  		p.From.Scale = 1
   565  		p.From.Index = i
   566  		gc.AddAux(&p.From, v)
   567  		p.To.Type = obj.TYPE_REG
   568  		p.To.Reg = v.Reg()
   569  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_REG
   572  		p.From.Reg = v.Args[1].Reg()
   573  		p.To.Type = obj.TYPE_MEM
   574  		p.To.Reg = v.Args[0].Reg()
   575  		gc.AddAux(&p.To, v)
   576  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_REG
   579  		p.From.Reg = v.Args[2].Reg()
   580  		p.To.Type = obj.TYPE_MEM
   581  		p.To.Reg = v.Args[0].Reg()
   582  		p.To.Scale = 8
   583  		p.To.Index = v.Args[1].Reg()
   584  		gc.AddAux(&p.To, v)
   585  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   586  		p := s.Prog(v.Op.Asm())
   587  		p.From.Type = obj.TYPE_REG
   588  		p.From.Reg = v.Args[2].Reg()
   589  		p.To.Type = obj.TYPE_MEM
   590  		p.To.Reg = v.Args[0].Reg()
   591  		p.To.Scale = 4
   592  		p.To.Index = v.Args[1].Reg()
   593  		gc.AddAux(&p.To, v)
   594  	case ssa.OpAMD64MOVWstoreidx2:
   595  		p := s.Prog(v.Op.Asm())
   596  		p.From.Type = obj.TYPE_REG
   597  		p.From.Reg = v.Args[2].Reg()
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = v.Args[0].Reg()
   600  		p.To.Scale = 2
   601  		p.To.Index = v.Args[1].Reg()
   602  		gc.AddAux(&p.To, v)
   603  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   604  		r := v.Args[0].Reg()
   605  		i := v.Args[1].Reg()
   606  		if i == x86.REG_SP {
   607  			r, i = i, r
   608  		}
   609  		p := s.Prog(v.Op.Asm())
   610  		p.From.Type = obj.TYPE_REG
   611  		p.From.Reg = v.Args[2].Reg()
   612  		p.To.Type = obj.TYPE_MEM
   613  		p.To.Reg = r
   614  		p.To.Scale = 1
   615  		p.To.Index = i
   616  		gc.AddAux(&p.To, v)
   617  	case ssa.OpAMD64ADDQconstmem, ssa.OpAMD64ADDLconstmem:
   618  		sc := v.AuxValAndOff()
   619  		off := sc.Off()
   620  		val := sc.Val()
   621  		if val == 1 {
   622  			var asm obj.As
   623  			if v.Op == ssa.OpAMD64ADDQconstmem {
   624  				asm = x86.AINCQ
   625  			} else {
   626  				asm = x86.AINCL
   627  			}
   628  			p := s.Prog(asm)
   629  			p.To.Type = obj.TYPE_MEM
   630  			p.To.Reg = v.Args[0].Reg()
   631  			gc.AddAux2(&p.To, v, off)
   632  		} else {
   633  			p := s.Prog(v.Op.Asm())
   634  			p.From.Type = obj.TYPE_CONST
   635  			p.From.Offset = val
   636  			p.To.Type = obj.TYPE_MEM
   637  			p.To.Reg = v.Args[0].Reg()
   638  			gc.AddAux2(&p.To, v, off)
   639  		}
   640  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   641  		p := s.Prog(v.Op.Asm())
   642  		p.From.Type = obj.TYPE_CONST
   643  		sc := v.AuxValAndOff()
   644  		p.From.Offset = sc.Val()
   645  		p.To.Type = obj.TYPE_MEM
   646  		p.To.Reg = v.Args[0].Reg()
   647  		gc.AddAux2(&p.To, v, sc.Off())
   648  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   649  		p := s.Prog(v.Op.Asm())
   650  		p.From.Type = obj.TYPE_CONST
   651  		sc := v.AuxValAndOff()
   652  		p.From.Offset = sc.Val()
   653  		r := v.Args[0].Reg()
   654  		i := v.Args[1].Reg()
   655  		switch v.Op {
   656  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   657  			p.To.Scale = 1
   658  			if i == x86.REG_SP {
   659  				r, i = i, r
   660  			}
   661  		case ssa.OpAMD64MOVWstoreconstidx2:
   662  			p.To.Scale = 2
   663  		case ssa.OpAMD64MOVLstoreconstidx4:
   664  			p.To.Scale = 4
   665  		case ssa.OpAMD64MOVQstoreconstidx8:
   666  			p.To.Scale = 8
   667  		}
   668  		p.To.Type = obj.TYPE_MEM
   669  		p.To.Reg = r
   670  		p.To.Index = i
   671  		gc.AddAux2(&p.To, v, sc.Off())
   672  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   673  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   674  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   675  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   676  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   677  		r := v.Reg()
   678  		// Break false dependency on destination register.
   679  		opregreg(s, x86.AXORPS, r, r)
   680  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   681  	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   682  		p := s.Prog(x86.AMOVQ)
   683  		p.From.Type = obj.TYPE_REG
   684  		p.From.Reg = v.Args[0].Reg()
   685  		p.To.Type = obj.TYPE_REG
   686  		p.To.Reg = v.Reg()
   687  	case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   688  		p := s.Prog(x86.AMOVL)
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = v.Args[0].Reg()
   691  		p.To.Type = obj.TYPE_REG
   692  		p.To.Reg = v.Reg()
   693  	case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
   694  		ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
   695  		ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
   696  		ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
   697  		p := s.Prog(v.Op.Asm())
   698  		p.From.Type = obj.TYPE_MEM
   699  		p.From.Reg = v.Args[1].Reg()
   700  		gc.AddAux(&p.From, v)
   701  		p.To.Type = obj.TYPE_REG
   702  		p.To.Reg = v.Reg()
   703  		if v.Reg() != v.Args[0].Reg() {
   704  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   705  		}
   706  	case ssa.OpAMD64DUFFZERO:
   707  		off := duffStart(v.AuxInt)
   708  		adj := duffAdj(v.AuxInt)
   709  		var p *obj.Prog
   710  		if adj != 0 {
   711  			p = s.Prog(x86.ALEAQ)
   712  			p.From.Type = obj.TYPE_MEM
   713  			p.From.Offset = adj
   714  			p.From.Reg = x86.REG_DI
   715  			p.To.Type = obj.TYPE_REG
   716  			p.To.Reg = x86.REG_DI
   717  		}
   718  		p = s.Prog(obj.ADUFFZERO)
   719  		p.To.Type = obj.TYPE_ADDR
   720  		p.To.Sym = gc.Duffzero
   721  		p.To.Offset = off
   722  	case ssa.OpAMD64MOVOconst:
   723  		if v.AuxInt != 0 {
   724  			v.Fatalf("MOVOconst can only do constant=0")
   725  		}
   726  		r := v.Reg()
   727  		opregreg(s, x86.AXORPS, r, r)
   728  	case ssa.OpAMD64DUFFCOPY:
   729  		p := s.Prog(obj.ADUFFCOPY)
   730  		p.To.Type = obj.TYPE_ADDR
   731  		p.To.Sym = gc.Duffcopy
   732  		p.To.Offset = v.AuxInt
   733  
   734  	case ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert:
   735  		if v.Args[0].Reg() != v.Reg() {
   736  			v.Fatalf("MOVXconvert should be a no-op")
   737  		}
   738  	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   739  		if v.Type.IsMemory() {
   740  			return
   741  		}
   742  		x := v.Args[0].Reg()
   743  		y := v.Reg()
   744  		if x != y {
   745  			opregreg(s, moveByType(v.Type), y, x)
   746  		}
   747  	case ssa.OpLoadReg:
   748  		if v.Type.IsFlags() {
   749  			v.Fatalf("load flags not implemented: %v", v.LongString())
   750  			return
   751  		}
   752  		p := s.Prog(loadByType(v.Type))
   753  		gc.AddrAuto(&p.From, v.Args[0])
   754  		p.To.Type = obj.TYPE_REG
   755  		p.To.Reg = v.Reg()
   756  
   757  	case ssa.OpStoreReg:
   758  		if v.Type.IsFlags() {
   759  			v.Fatalf("store flags not implemented: %v", v.LongString())
   760  			return
   761  		}
   762  		p := s.Prog(storeByType(v.Type))
   763  		p.From.Type = obj.TYPE_REG
   764  		p.From.Reg = v.Args[0].Reg()
   765  		gc.AddrAuto(&p.To, v)
   766  	case ssa.OpAMD64LoweredGetClosurePtr:
   767  		// Closure pointer is DX.
   768  		gc.CheckLoweredGetClosurePtr(v)
   769  	case ssa.OpAMD64LoweredGetG:
   770  		r := v.Reg()
   771  		// See the comments in cmd/internal/obj/x86/obj6.go
   772  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   773  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   774  			// MOVQ (TLS), r
   775  			p := s.Prog(x86.AMOVQ)
   776  			p.From.Type = obj.TYPE_MEM
   777  			p.From.Reg = x86.REG_TLS
   778  			p.To.Type = obj.TYPE_REG
   779  			p.To.Reg = r
   780  		} else {
   781  			// MOVQ TLS, r
   782  			// MOVQ (r)(TLS*1), r
   783  			p := s.Prog(x86.AMOVQ)
   784  			p.From.Type = obj.TYPE_REG
   785  			p.From.Reg = x86.REG_TLS
   786  			p.To.Type = obj.TYPE_REG
   787  			p.To.Reg = r
   788  			q := s.Prog(x86.AMOVQ)
   789  			q.From.Type = obj.TYPE_MEM
   790  			q.From.Reg = r
   791  			q.From.Index = x86.REG_TLS
   792  			q.From.Scale = 1
   793  			q.To.Type = obj.TYPE_REG
   794  			q.To.Reg = r
   795  		}
   796  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   797  		s.Call(v)
   798  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   799  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   800  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   801  		r := v.Reg()
   802  		if r != v.Args[0].Reg() {
   803  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   804  		}
   805  		p := s.Prog(v.Op.Asm())
   806  		p.To.Type = obj.TYPE_REG
   807  		p.To.Reg = r
   808  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
   809  		p := s.Prog(v.Op.Asm())
   810  		p.From.Type = obj.TYPE_REG
   811  		p.From.Reg = v.Args[0].Reg()
   812  		p.To.Type = obj.TYPE_REG
   813  		p.To.Reg = v.Reg0()
   814  	case ssa.OpAMD64SQRTSD:
   815  		p := s.Prog(v.Op.Asm())
   816  		p.From.Type = obj.TYPE_REG
   817  		p.From.Reg = v.Args[0].Reg()
   818  		p.To.Type = obj.TYPE_REG
   819  		p.To.Reg = v.Reg()
   820  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
   821  		if v.Args[0].Reg() != v.Reg() {
   822  			// POPCNT on Intel has a false dependency on the destination register.
   823  			// Zero the destination to break the dependency.
   824  			p := s.Prog(x86.AMOVQ)
   825  			p.From.Type = obj.TYPE_CONST
   826  			p.From.Offset = 0
   827  			p.To.Type = obj.TYPE_REG
   828  			p.To.Reg = v.Reg()
   829  		}
   830  		p := s.Prog(v.Op.Asm())
   831  		p.From.Type = obj.TYPE_REG
   832  		p.From.Reg = v.Args[0].Reg()
   833  		p.To.Type = obj.TYPE_REG
   834  		p.To.Reg = v.Reg()
   835  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   836  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   837  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   838  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   839  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   840  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   841  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   842  		p := s.Prog(v.Op.Asm())
   843  		p.To.Type = obj.TYPE_REG
   844  		p.To.Reg = v.Reg()
   845  
   846  	case ssa.OpAMD64SETNEF:
   847  		p := s.Prog(v.Op.Asm())
   848  		p.To.Type = obj.TYPE_REG
   849  		p.To.Reg = v.Reg()
   850  		q := s.Prog(x86.ASETPS)
   851  		q.To.Type = obj.TYPE_REG
   852  		q.To.Reg = x86.REG_AX
   853  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   854  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   855  
   856  	case ssa.OpAMD64SETEQF:
   857  		p := s.Prog(v.Op.Asm())
   858  		p.To.Type = obj.TYPE_REG
   859  		p.To.Reg = v.Reg()
   860  		q := s.Prog(x86.ASETPC)
   861  		q.To.Type = obj.TYPE_REG
   862  		q.To.Reg = x86.REG_AX
   863  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   864  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   865  
   866  	case ssa.OpAMD64InvertFlags:
   867  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   868  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   869  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   870  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   871  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   872  	case ssa.OpAMD64REPSTOSQ:
   873  		s.Prog(x86.AREP)
   874  		s.Prog(x86.ASTOSQ)
   875  	case ssa.OpAMD64REPMOVSQ:
   876  		s.Prog(x86.AREP)
   877  		s.Prog(x86.AMOVSQ)
   878  	case ssa.OpAMD64LoweredNilCheck:
   879  		// Issue a load which will fault if the input is nil.
   880  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   881  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   882  		// but it doesn't have false dependency on AX.
   883  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   884  		// That trades clobbering flags for clobbering a register.
   885  		p := s.Prog(x86.ATESTB)
   886  		p.From.Type = obj.TYPE_REG
   887  		p.From.Reg = x86.REG_AX
   888  		p.To.Type = obj.TYPE_MEM
   889  		p.To.Reg = v.Args[0].Reg()
   890  		gc.AddAux(&p.To, v)
   891  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   892  			gc.Warnl(v.Pos, "generated nil check")
   893  		}
   894  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   895  		p := s.Prog(v.Op.Asm())
   896  		p.From.Type = obj.TYPE_MEM
   897  		p.From.Reg = v.Args[0].Reg()
   898  		gc.AddAux(&p.From, v)
   899  		p.To.Type = obj.TYPE_REG
   900  		p.To.Reg = v.Reg0()
   901  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   902  		r := v.Reg0()
   903  		if r != v.Args[0].Reg() {
   904  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   905  		}
   906  		p := s.Prog(v.Op.Asm())
   907  		p.From.Type = obj.TYPE_REG
   908  		p.From.Reg = r
   909  		p.To.Type = obj.TYPE_MEM
   910  		p.To.Reg = v.Args[1].Reg()
   911  		gc.AddAux(&p.To, v)
   912  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   913  		r := v.Reg0()
   914  		if r != v.Args[0].Reg() {
   915  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   916  		}
   917  		s.Prog(x86.ALOCK)
   918  		p := s.Prog(v.Op.Asm())
   919  		p.From.Type = obj.TYPE_REG
   920  		p.From.Reg = r
   921  		p.To.Type = obj.TYPE_MEM
   922  		p.To.Reg = v.Args[1].Reg()
   923  		gc.AddAux(&p.To, v)
   924  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   925  		if v.Args[1].Reg() != x86.REG_AX {
   926  			v.Fatalf("input[1] not in AX %s", v.LongString())
   927  		}
   928  		s.Prog(x86.ALOCK)
   929  		p := s.Prog(v.Op.Asm())
   930  		p.From.Type = obj.TYPE_REG
   931  		p.From.Reg = v.Args[2].Reg()
   932  		p.To.Type = obj.TYPE_MEM
   933  		p.To.Reg = v.Args[0].Reg()
   934  		gc.AddAux(&p.To, v)
   935  		p = s.Prog(x86.ASETEQ)
   936  		p.To.Type = obj.TYPE_REG
   937  		p.To.Reg = v.Reg0()
   938  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   939  		s.Prog(x86.ALOCK)
   940  		p := s.Prog(v.Op.Asm())
   941  		p.From.Type = obj.TYPE_REG
   942  		p.From.Reg = v.Args[1].Reg()
   943  		p.To.Type = obj.TYPE_MEM
   944  		p.To.Reg = v.Args[0].Reg()
   945  		gc.AddAux(&p.To, v)
   946  	case ssa.OpClobber:
   947  		p := s.Prog(x86.AMOVL)
   948  		p.From.Type = obj.TYPE_CONST
   949  		p.From.Offset = 0xdeaddead
   950  		p.To.Type = obj.TYPE_MEM
   951  		p.To.Reg = x86.REG_SP
   952  		gc.AddAux(&p.To, v)
   953  		p = s.Prog(x86.AMOVL)
   954  		p.From.Type = obj.TYPE_CONST
   955  		p.From.Offset = 0xdeaddead
   956  		p.To.Type = obj.TYPE_MEM
   957  		p.To.Reg = x86.REG_SP
   958  		gc.AddAux(&p.To, v)
   959  		p.To.Offset += 4
   960  	default:
   961  		v.Fatalf("genValue not implemented: %s", v.LongString())
   962  	}
   963  }
   964  
   965  var blockJump = [...]struct {
   966  	asm, invasm obj.As
   967  }{
   968  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   969  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   970  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   971  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   972  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   973  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   974  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   975  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   976  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   977  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   978  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   979  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   980  }
   981  
   982  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   983  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   984  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   985  }
   986  var nefJumps = [2][2]gc.FloatingEQNEJump{
   987  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   988  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   989  }
   990  
   991  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   992  	switch b.Kind {
   993  	case ssa.BlockPlain:
   994  		if b.Succs[0].Block() != next {
   995  			p := s.Prog(obj.AJMP)
   996  			p.To.Type = obj.TYPE_BRANCH
   997  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   998  		}
   999  	case ssa.BlockDefer:
  1000  		// defer returns in rax:
  1001  		// 0 if we should continue executing
  1002  		// 1 if we should jump to deferreturn call
  1003  		p := s.Prog(x86.ATESTL)
  1004  		p.From.Type = obj.TYPE_REG
  1005  		p.From.Reg = x86.REG_AX
  1006  		p.To.Type = obj.TYPE_REG
  1007  		p.To.Reg = x86.REG_AX
  1008  		p = s.Prog(x86.AJNE)
  1009  		p.To.Type = obj.TYPE_BRANCH
  1010  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1011  		if b.Succs[0].Block() != next {
  1012  			p := s.Prog(obj.AJMP)
  1013  			p.To.Type = obj.TYPE_BRANCH
  1014  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1015  		}
  1016  	case ssa.BlockExit:
  1017  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
  1018  	case ssa.BlockRet:
  1019  		s.Prog(obj.ARET)
  1020  	case ssa.BlockRetJmp:
  1021  		p := s.Prog(obj.AJMP)
  1022  		p.To.Type = obj.TYPE_MEM
  1023  		p.To.Name = obj.NAME_EXTERN
  1024  		p.To.Sym = b.Aux.(*obj.LSym)
  1025  
  1026  	case ssa.BlockAMD64EQF:
  1027  		s.FPJump(b, next, &eqfJumps)
  1028  
  1029  	case ssa.BlockAMD64NEF:
  1030  		s.FPJump(b, next, &nefJumps)
  1031  
  1032  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1033  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1034  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1035  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1036  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1037  		jmp := blockJump[b.Kind]
  1038  		var p *obj.Prog
  1039  		switch next {
  1040  		case b.Succs[0].Block():
  1041  			p = s.Prog(jmp.invasm)
  1042  			p.To.Type = obj.TYPE_BRANCH
  1043  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1044  		case b.Succs[1].Block():
  1045  			p = s.Prog(jmp.asm)
  1046  			p.To.Type = obj.TYPE_BRANCH
  1047  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1048  		default:
  1049  			p = s.Prog(jmp.asm)
  1050  			p.To.Type = obj.TYPE_BRANCH
  1051  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1052  			q := s.Prog(obj.AJMP)
  1053  			q.To.Type = obj.TYPE_BRANCH
  1054  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
  1055  		}
  1056  
  1057  	default:
  1058  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1059  	}
  1060  }