github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/x86"
    15  )
    16  
    17  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    18  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    19  	flive := b.FlagsLiveAtEnd
    20  	if b.Control != nil && b.Control.Type.IsFlags() {
    21  		flive = true
    22  	}
    23  	for i := len(b.Values) - 1; i >= 0; i-- {
    24  		v := b.Values[i]
    25  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    26  			// The "mark" is any non-nil Aux value.
    27  			v.Aux = v
    28  		}
    29  		if v.Type.IsFlags() {
    30  			flive = false
    31  		}
    32  		for _, a := range v.Args {
    33  			if a.Type.IsFlags() {
    34  				flive = true
    35  			}
    36  		}
    37  	}
    38  }
    39  
    40  // loadByType returns the load instruction of the given type.
    41  func loadByType(t ssa.Type) obj.As {
    42  	// Avoid partial register write
    43  	if !t.IsFloat() && t.Size() <= 2 {
    44  		if t.Size() == 1 {
    45  			return x86.AMOVBLZX
    46  		} else {
    47  			return x86.AMOVWLZX
    48  		}
    49  	}
    50  	// Otherwise, there's no difference between load and store opcodes.
    51  	return storeByType(t)
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t ssa.Type) obj.As {
    56  	width := t.Size()
    57  	if t.IsFloat() {
    58  		switch width {
    59  		case 4:
    60  			return x86.AMOVSS
    61  		case 8:
    62  			return x86.AMOVSD
    63  		}
    64  	} else {
    65  		switch width {
    66  		case 1:
    67  			return x86.AMOVB
    68  		case 2:
    69  			return x86.AMOVW
    70  		case 4:
    71  			return x86.AMOVL
    72  		case 8:
    73  			return x86.AMOVQ
    74  		}
    75  	}
    76  	panic("bad store type")
    77  }
    78  
    79  // moveByType returns the reg->reg move instruction of the given type.
    80  func moveByType(t ssa.Type) obj.As {
    81  	if t.IsFloat() {
    82  		// Moving the whole sse2 register is faster
    83  		// than moving just the correct low portion of it.
    84  		// There is no xmm->xmm move with 1 byte opcode,
    85  		// so use movups, which has 2 byte opcode.
    86  		return x86.AMOVUPS
    87  	} else {
    88  		switch t.Size() {
    89  		case 1:
    90  			// Avoids partial register write
    91  			return x86.AMOVL
    92  		case 2:
    93  			return x86.AMOVL
    94  		case 4:
    95  			return x86.AMOVL
    96  		case 8:
    97  			return x86.AMOVQ
    98  		case 16:
    99  			return x86.AMOVUPS // int128s are in SSE registers
   100  		default:
   101  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   102  		}
   103  	}
   104  }
   105  
   106  // opregreg emits instructions for
   107  //     dest := dest(To) op src(From)
   108  // and also returns the created obj.Prog so it
   109  // may be further adjusted (offset, scale, etc).
   110  func opregreg(op obj.As, dest, src int16) *obj.Prog {
   111  	p := gc.Prog(op)
   112  	p.From.Type = obj.TYPE_REG
   113  	p.To.Type = obj.TYPE_REG
   114  	p.To.Reg = dest
   115  	p.From.Reg = src
   116  	return p
   117  }
   118  
   119  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
   120  // See runtime/mkduff.go.
   121  func duffStart(size int64) int64 {
   122  	x, _ := duff(size)
   123  	return x
   124  }
   125  func duffAdj(size int64) int64 {
   126  	_, x := duff(size)
   127  	return x
   128  }
   129  
   130  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   131  // required to use the duffzero mechanism for a block of the given size.
   132  func duff(size int64) (int64, int64) {
   133  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   134  		panic("bad duffzero size")
   135  	}
   136  	steps := size / dzClearStep
   137  	blocks := steps / dzBlockLen
   138  	steps %= dzBlockLen
   139  	off := dzBlockSize * (dzBlocks - blocks)
   140  	var adj int64
   141  	if steps != 0 {
   142  		off -= dzAddSize
   143  		off -= dzMovSize * steps
   144  		adj -= dzClearStep * (dzBlockLen - steps)
   145  	}
   146  	return off, adj
   147  }
   148  
   149  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   150  	s.SetPos(v.Pos)
   151  	switch v.Op {
   152  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   153  		r := v.Reg()
   154  		r1 := v.Args[0].Reg()
   155  		r2 := v.Args[1].Reg()
   156  		switch {
   157  		case r == r1:
   158  			p := gc.Prog(v.Op.Asm())
   159  			p.From.Type = obj.TYPE_REG
   160  			p.From.Reg = r2
   161  			p.To.Type = obj.TYPE_REG
   162  			p.To.Reg = r
   163  		case r == r2:
   164  			p := gc.Prog(v.Op.Asm())
   165  			p.From.Type = obj.TYPE_REG
   166  			p.From.Reg = r1
   167  			p.To.Type = obj.TYPE_REG
   168  			p.To.Reg = r
   169  		default:
   170  			var asm obj.As
   171  			if v.Op == ssa.OpAMD64ADDQ {
   172  				asm = x86.ALEAQ
   173  			} else {
   174  				asm = x86.ALEAL
   175  			}
   176  			p := gc.Prog(asm)
   177  			p.From.Type = obj.TYPE_MEM
   178  			p.From.Reg = r1
   179  			p.From.Scale = 1
   180  			p.From.Index = r2
   181  			p.To.Type = obj.TYPE_REG
   182  			p.To.Reg = r
   183  		}
   184  	// 2-address opcode arithmetic
   185  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   186  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   187  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   188  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   189  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   190  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   191  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   192  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   193  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   194  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   195  		ssa.OpAMD64PXOR:
   196  		r := v.Reg()
   197  		if r != v.Args[0].Reg() {
   198  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   199  		}
   200  		opregreg(v.Op.Asm(), r, v.Args[1].Reg())
   201  
   202  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   203  		// Arg[0] (the dividend) is in AX.
   204  		// Arg[1] (the divisor) can be in any other register.
   205  		// Result[0] (the quotient) is in AX.
   206  		// Result[1] (the remainder) is in DX.
   207  		r := v.Args[1].Reg()
   208  
   209  		// Zero extend dividend.
   210  		c := gc.Prog(x86.AXORL)
   211  		c.From.Type = obj.TYPE_REG
   212  		c.From.Reg = x86.REG_DX
   213  		c.To.Type = obj.TYPE_REG
   214  		c.To.Reg = x86.REG_DX
   215  
   216  		// Issue divide.
   217  		p := gc.Prog(v.Op.Asm())
   218  		p.From.Type = obj.TYPE_REG
   219  		p.From.Reg = r
   220  
   221  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   222  		// Arg[0] (the dividend) is in AX.
   223  		// Arg[1] (the divisor) can be in any other register.
   224  		// Result[0] (the quotient) is in AX.
   225  		// Result[1] (the remainder) is in DX.
   226  		r := v.Args[1].Reg()
   227  
   228  		// CPU faults upon signed overflow, which occurs when the most
   229  		// negative int is divided by -1. Handle divide by -1 as a special case.
   230  		var c *obj.Prog
   231  		switch v.Op {
   232  		case ssa.OpAMD64DIVQ:
   233  			c = gc.Prog(x86.ACMPQ)
   234  		case ssa.OpAMD64DIVL:
   235  			c = gc.Prog(x86.ACMPL)
   236  		case ssa.OpAMD64DIVW:
   237  			c = gc.Prog(x86.ACMPW)
   238  		}
   239  		c.From.Type = obj.TYPE_REG
   240  		c.From.Reg = r
   241  		c.To.Type = obj.TYPE_CONST
   242  		c.To.Offset = -1
   243  		j1 := gc.Prog(x86.AJEQ)
   244  		j1.To.Type = obj.TYPE_BRANCH
   245  
   246  		// Sign extend dividend.
   247  		switch v.Op {
   248  		case ssa.OpAMD64DIVQ:
   249  			gc.Prog(x86.ACQO)
   250  		case ssa.OpAMD64DIVL:
   251  			gc.Prog(x86.ACDQ)
   252  		case ssa.OpAMD64DIVW:
   253  			gc.Prog(x86.ACWD)
   254  		}
   255  
   256  		// Issue divide.
   257  		p := gc.Prog(v.Op.Asm())
   258  		p.From.Type = obj.TYPE_REG
   259  		p.From.Reg = r
   260  
   261  		// Skip over -1 fixup code.
   262  		j2 := gc.Prog(obj.AJMP)
   263  		j2.To.Type = obj.TYPE_BRANCH
   264  
   265  		// Issue -1 fixup code.
   266  		// n / -1 = -n
   267  		n1 := gc.Prog(x86.ANEGQ)
   268  		n1.To.Type = obj.TYPE_REG
   269  		n1.To.Reg = x86.REG_AX
   270  
   271  		// n % -1 == 0
   272  		n2 := gc.Prog(x86.AXORL)
   273  		n2.From.Type = obj.TYPE_REG
   274  		n2.From.Reg = x86.REG_DX
   275  		n2.To.Type = obj.TYPE_REG
   276  		n2.To.Reg = x86.REG_DX
   277  
   278  		// TODO(khr): issue only the -1 fixup code we need.
   279  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   280  
   281  		j1.To.Val = n1
   282  		j2.To.Val = s.Pc()
   283  
   284  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
   285  		ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
   286  		// the frontend rewrites constant division by 8/16/32 bit integers into
   287  		// HMUL by a constant
   288  		// SSA rewrites generate the 64 bit versions
   289  
   290  		// Arg[0] is already in AX as it's the only register we allow
   291  		// and DX is the only output we care about (the high bits)
   292  		p := gc.Prog(v.Op.Asm())
   293  		p.From.Type = obj.TYPE_REG
   294  		p.From.Reg = v.Args[1].Reg()
   295  
   296  		// IMULB puts the high portion in AH instead of DL,
   297  		// so move it to DL for consistency
   298  		if v.Type.Size() == 1 {
   299  			m := gc.Prog(x86.AMOVB)
   300  			m.From.Type = obj.TYPE_REG
   301  			m.From.Reg = x86.REG_AH
   302  			m.To.Type = obj.TYPE_REG
   303  			m.To.Reg = x86.REG_DX
   304  		}
   305  
   306  	case ssa.OpAMD64MULQU2:
   307  		// Arg[0] is already in AX as it's the only register we allow
   308  		// results hi in DX, lo in AX
   309  		p := gc.Prog(v.Op.Asm())
   310  		p.From.Type = obj.TYPE_REG
   311  		p.From.Reg = v.Args[1].Reg()
   312  
   313  	case ssa.OpAMD64DIVQU2:
   314  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   315  		// results q in AX, r in DX
   316  		p := gc.Prog(v.Op.Asm())
   317  		p.From.Type = obj.TYPE_REG
   318  		p.From.Reg = v.Args[2].Reg()
   319  
   320  	case ssa.OpAMD64AVGQU:
   321  		// compute (x+y)/2 unsigned.
   322  		// Do a 64-bit add, the overflow goes into the carry.
   323  		// Shift right once and pull the carry back into the 63rd bit.
   324  		r := v.Reg()
   325  		if r != v.Args[0].Reg() {
   326  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   327  		}
   328  		p := gc.Prog(x86.AADDQ)
   329  		p.From.Type = obj.TYPE_REG
   330  		p.To.Type = obj.TYPE_REG
   331  		p.To.Reg = r
   332  		p.From.Reg = v.Args[1].Reg()
   333  		p = gc.Prog(x86.ARCRQ)
   334  		p.From.Type = obj.TYPE_CONST
   335  		p.From.Offset = 1
   336  		p.To.Type = obj.TYPE_REG
   337  		p.To.Reg = r
   338  
   339  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   340  		r := v.Reg()
   341  		a := v.Args[0].Reg()
   342  		if r == a {
   343  			if v.AuxInt == 1 {
   344  				var asm obj.As
   345  				// Software optimization manual recommends add $1,reg.
   346  				// But inc/dec is 1 byte smaller. ICC always uses inc
   347  				// Clang/GCC choose depending on flags, but prefer add.
   348  				// Experiments show that inc/dec is both a little faster
   349  				// and make a binary a little smaller.
   350  				if v.Op == ssa.OpAMD64ADDQconst {
   351  					asm = x86.AINCQ
   352  				} else {
   353  					asm = x86.AINCL
   354  				}
   355  				p := gc.Prog(asm)
   356  				p.To.Type = obj.TYPE_REG
   357  				p.To.Reg = r
   358  				return
   359  			}
   360  			if v.AuxInt == -1 {
   361  				var asm obj.As
   362  				if v.Op == ssa.OpAMD64ADDQconst {
   363  					asm = x86.ADECQ
   364  				} else {
   365  					asm = x86.ADECL
   366  				}
   367  				p := gc.Prog(asm)
   368  				p.To.Type = obj.TYPE_REG
   369  				p.To.Reg = r
   370  				return
   371  			}
   372  			p := gc.Prog(v.Op.Asm())
   373  			p.From.Type = obj.TYPE_CONST
   374  			p.From.Offset = v.AuxInt
   375  			p.To.Type = obj.TYPE_REG
   376  			p.To.Reg = r
   377  			return
   378  		}
   379  		var asm obj.As
   380  		if v.Op == ssa.OpAMD64ADDQconst {
   381  			asm = x86.ALEAQ
   382  		} else {
   383  			asm = x86.ALEAL
   384  		}
   385  		p := gc.Prog(asm)
   386  		p.From.Type = obj.TYPE_MEM
   387  		p.From.Reg = a
   388  		p.From.Offset = v.AuxInt
   389  		p.To.Type = obj.TYPE_REG
   390  		p.To.Reg = r
   391  
   392  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   393  		r := v.Reg()
   394  		if r != v.Args[0].Reg() {
   395  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   396  		}
   397  		p := gc.Prog(v.Op.Asm())
   398  		p.From.Type = obj.TYPE_REG
   399  		p.From.Reg = v.Args[1].Reg()
   400  		p.To.Type = obj.TYPE_REG
   401  		p.To.Reg = r
   402  
   403  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   404  		r := v.Reg()
   405  		if r != v.Args[0].Reg() {
   406  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   407  		}
   408  		p := gc.Prog(v.Op.Asm())
   409  		p.From.Type = obj.TYPE_CONST
   410  		p.From.Offset = v.AuxInt
   411  		p.To.Type = obj.TYPE_REG
   412  		p.To.Reg = r
   413  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   414  		// then we don't need to use resultInArg0 for these ops.
   415  		//p.From3 = new(obj.Addr)
   416  		//p.From3.Type = obj.TYPE_REG
   417  		//p.From3.Reg = v.Args[0].Reg()
   418  
   419  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   420  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   421  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   422  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   423  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   424  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   425  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   426  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   427  		r := v.Reg()
   428  		if r != v.Args[0].Reg() {
   429  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   430  		}
   431  		p := gc.Prog(v.Op.Asm())
   432  		p.From.Type = obj.TYPE_CONST
   433  		p.From.Offset = v.AuxInt
   434  		p.To.Type = obj.TYPE_REG
   435  		p.To.Reg = r
   436  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   437  		r := v.Reg()
   438  		p := gc.Prog(v.Op.Asm())
   439  		p.From.Type = obj.TYPE_REG
   440  		p.From.Reg = r
   441  		p.To.Type = obj.TYPE_REG
   442  		p.To.Reg = r
   443  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   444  		r := v.Args[0].Reg()
   445  		i := v.Args[1].Reg()
   446  		p := gc.Prog(x86.ALEAQ)
   447  		switch v.Op {
   448  		case ssa.OpAMD64LEAQ1:
   449  			p.From.Scale = 1
   450  			if i == x86.REG_SP {
   451  				r, i = i, r
   452  			}
   453  		case ssa.OpAMD64LEAQ2:
   454  			p.From.Scale = 2
   455  		case ssa.OpAMD64LEAQ4:
   456  			p.From.Scale = 4
   457  		case ssa.OpAMD64LEAQ8:
   458  			p.From.Scale = 8
   459  		}
   460  		p.From.Type = obj.TYPE_MEM
   461  		p.From.Reg = r
   462  		p.From.Index = i
   463  		gc.AddAux(&p.From, v)
   464  		p.To.Type = obj.TYPE_REG
   465  		p.To.Reg = v.Reg()
   466  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   467  		p := gc.Prog(v.Op.Asm())
   468  		p.From.Type = obj.TYPE_MEM
   469  		p.From.Reg = v.Args[0].Reg()
   470  		gc.AddAux(&p.From, v)
   471  		p.To.Type = obj.TYPE_REG
   472  		p.To.Reg = v.Reg()
   473  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   474  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
   475  		opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   476  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   477  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   478  		// must account for that right here.
   479  		opregreg(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   480  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   481  		p := gc.Prog(v.Op.Asm())
   482  		p.From.Type = obj.TYPE_REG
   483  		p.From.Reg = v.Args[0].Reg()
   484  		p.To.Type = obj.TYPE_CONST
   485  		p.To.Offset = v.AuxInt
   486  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
   487  		p := gc.Prog(v.Op.Asm())
   488  		p.From.Type = obj.TYPE_CONST
   489  		p.From.Offset = v.AuxInt
   490  		p.To.Type = obj.TYPE_REG
   491  		p.To.Reg = v.Args[0].Reg()
   492  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   493  		x := v.Reg()
   494  		p := gc.Prog(v.Op.Asm())
   495  		p.From.Type = obj.TYPE_CONST
   496  		p.From.Offset = v.AuxInt
   497  		p.To.Type = obj.TYPE_REG
   498  		p.To.Reg = x
   499  		// If flags are live at this instruction, suppress the
   500  		// MOV $0,AX -> XOR AX,AX optimization.
   501  		if v.Aux != nil {
   502  			p.Mark |= x86.PRESERVEFLAGS
   503  		}
   504  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   505  		x := v.Reg()
   506  		p := gc.Prog(v.Op.Asm())
   507  		p.From.Type = obj.TYPE_FCONST
   508  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   509  		p.To.Type = obj.TYPE_REG
   510  		p.To.Reg = x
   511  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   512  		p := gc.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_MEM
   514  		p.From.Reg = v.Args[0].Reg()
   515  		gc.AddAux(&p.From, v)
   516  		p.To.Type = obj.TYPE_REG
   517  		p.To.Reg = v.Reg()
   518  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
   519  		p := gc.Prog(v.Op.Asm())
   520  		p.From.Type = obj.TYPE_MEM
   521  		p.From.Reg = v.Args[0].Reg()
   522  		gc.AddAux(&p.From, v)
   523  		p.From.Scale = 8
   524  		p.From.Index = v.Args[1].Reg()
   525  		p.To.Type = obj.TYPE_REG
   526  		p.To.Reg = v.Reg()
   527  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   528  		p := gc.Prog(v.Op.Asm())
   529  		p.From.Type = obj.TYPE_MEM
   530  		p.From.Reg = v.Args[0].Reg()
   531  		gc.AddAux(&p.From, v)
   532  		p.From.Scale = 4
   533  		p.From.Index = v.Args[1].Reg()
   534  		p.To.Type = obj.TYPE_REG
   535  		p.To.Reg = v.Reg()
   536  	case ssa.OpAMD64MOVWloadidx2:
   537  		p := gc.Prog(v.Op.Asm())
   538  		p.From.Type = obj.TYPE_MEM
   539  		p.From.Reg = v.Args[0].Reg()
   540  		gc.AddAux(&p.From, v)
   541  		p.From.Scale = 2
   542  		p.From.Index = v.Args[1].Reg()
   543  		p.To.Type = obj.TYPE_REG
   544  		p.To.Reg = v.Reg()
   545  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   546  		r := v.Args[0].Reg()
   547  		i := v.Args[1].Reg()
   548  		if i == x86.REG_SP {
   549  			r, i = i, r
   550  		}
   551  		p := gc.Prog(v.Op.Asm())
   552  		p.From.Type = obj.TYPE_MEM
   553  		p.From.Reg = r
   554  		p.From.Scale = 1
   555  		p.From.Index = i
   556  		gc.AddAux(&p.From, v)
   557  		p.To.Type = obj.TYPE_REG
   558  		p.To.Reg = v.Reg()
   559  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   560  		p := gc.Prog(v.Op.Asm())
   561  		p.From.Type = obj.TYPE_REG
   562  		p.From.Reg = v.Args[1].Reg()
   563  		p.To.Type = obj.TYPE_MEM
   564  		p.To.Reg = v.Args[0].Reg()
   565  		gc.AddAux(&p.To, v)
   566  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
   567  		p := gc.Prog(v.Op.Asm())
   568  		p.From.Type = obj.TYPE_REG
   569  		p.From.Reg = v.Args[2].Reg()
   570  		p.To.Type = obj.TYPE_MEM
   571  		p.To.Reg = v.Args[0].Reg()
   572  		p.To.Scale = 8
   573  		p.To.Index = v.Args[1].Reg()
   574  		gc.AddAux(&p.To, v)
   575  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   576  		p := gc.Prog(v.Op.Asm())
   577  		p.From.Type = obj.TYPE_REG
   578  		p.From.Reg = v.Args[2].Reg()
   579  		p.To.Type = obj.TYPE_MEM
   580  		p.To.Reg = v.Args[0].Reg()
   581  		p.To.Scale = 4
   582  		p.To.Index = v.Args[1].Reg()
   583  		gc.AddAux(&p.To, v)
   584  	case ssa.OpAMD64MOVWstoreidx2:
   585  		p := gc.Prog(v.Op.Asm())
   586  		p.From.Type = obj.TYPE_REG
   587  		p.From.Reg = v.Args[2].Reg()
   588  		p.To.Type = obj.TYPE_MEM
   589  		p.To.Reg = v.Args[0].Reg()
   590  		p.To.Scale = 2
   591  		p.To.Index = v.Args[1].Reg()
   592  		gc.AddAux(&p.To, v)
   593  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   594  		r := v.Args[0].Reg()
   595  		i := v.Args[1].Reg()
   596  		if i == x86.REG_SP {
   597  			r, i = i, r
   598  		}
   599  		p := gc.Prog(v.Op.Asm())
   600  		p.From.Type = obj.TYPE_REG
   601  		p.From.Reg = v.Args[2].Reg()
   602  		p.To.Type = obj.TYPE_MEM
   603  		p.To.Reg = r
   604  		p.To.Scale = 1
   605  		p.To.Index = i
   606  		gc.AddAux(&p.To, v)
   607  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   608  		p := gc.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_CONST
   610  		sc := v.AuxValAndOff()
   611  		p.From.Offset = sc.Val()
   612  		p.To.Type = obj.TYPE_MEM
   613  		p.To.Reg = v.Args[0].Reg()
   614  		gc.AddAux2(&p.To, v, sc.Off())
   615  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   616  		p := gc.Prog(v.Op.Asm())
   617  		p.From.Type = obj.TYPE_CONST
   618  		sc := v.AuxValAndOff()
   619  		p.From.Offset = sc.Val()
   620  		r := v.Args[0].Reg()
   621  		i := v.Args[1].Reg()
   622  		switch v.Op {
   623  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   624  			p.To.Scale = 1
   625  			if i == x86.REG_SP {
   626  				r, i = i, r
   627  			}
   628  		case ssa.OpAMD64MOVWstoreconstidx2:
   629  			p.To.Scale = 2
   630  		case ssa.OpAMD64MOVLstoreconstidx4:
   631  			p.To.Scale = 4
   632  		case ssa.OpAMD64MOVQstoreconstidx8:
   633  			p.To.Scale = 8
   634  		}
   635  		p.To.Type = obj.TYPE_MEM
   636  		p.To.Reg = r
   637  		p.To.Index = i
   638  		gc.AddAux2(&p.To, v, sc.Off())
   639  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   640  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   641  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   642  		opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   643  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   644  		r := v.Reg()
   645  		// Break false dependency on destination register.
   646  		opregreg(x86.AXORPS, r, r)
   647  		opregreg(v.Op.Asm(), r, v.Args[0].Reg())
   648  	case ssa.OpAMD64DUFFZERO:
   649  		off := duffStart(v.AuxInt)
   650  		adj := duffAdj(v.AuxInt)
   651  		var p *obj.Prog
   652  		if adj != 0 {
   653  			p = gc.Prog(x86.AADDQ)
   654  			p.From.Type = obj.TYPE_CONST
   655  			p.From.Offset = adj
   656  			p.To.Type = obj.TYPE_REG
   657  			p.To.Reg = x86.REG_DI
   658  		}
   659  		p = gc.Prog(obj.ADUFFZERO)
   660  		p.To.Type = obj.TYPE_ADDR
   661  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
   662  		p.To.Offset = off
   663  	case ssa.OpAMD64MOVOconst:
   664  		if v.AuxInt != 0 {
   665  			v.Fatalf("MOVOconst can only do constant=0")
   666  		}
   667  		r := v.Reg()
   668  		opregreg(x86.AXORPS, r, r)
   669  	case ssa.OpAMD64DUFFCOPY:
   670  		p := gc.Prog(obj.ADUFFCOPY)
   671  		p.To.Type = obj.TYPE_ADDR
   672  		p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
   673  		p.To.Offset = v.AuxInt
   674  
   675  	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   676  		if v.Type.IsMemory() {
   677  			return
   678  		}
   679  		x := v.Args[0].Reg()
   680  		y := v.Reg()
   681  		if x != y {
   682  			opregreg(moveByType(v.Type), y, x)
   683  		}
   684  	case ssa.OpLoadReg:
   685  		if v.Type.IsFlags() {
   686  			v.Fatalf("load flags not implemented: %v", v.LongString())
   687  			return
   688  		}
   689  		p := gc.Prog(loadByType(v.Type))
   690  		gc.AddrAuto(&p.From, v.Args[0])
   691  		p.To.Type = obj.TYPE_REG
   692  		p.To.Reg = v.Reg()
   693  
   694  	case ssa.OpStoreReg:
   695  		if v.Type.IsFlags() {
   696  			v.Fatalf("store flags not implemented: %v", v.LongString())
   697  			return
   698  		}
   699  		p := gc.Prog(storeByType(v.Type))
   700  		p.From.Type = obj.TYPE_REG
   701  		p.From.Reg = v.Args[0].Reg()
   702  		gc.AddrAuto(&p.To, v)
   703  	case ssa.OpPhi:
   704  		gc.CheckLoweredPhi(v)
   705  	case ssa.OpInitMem:
   706  		// memory arg needs no code
   707  	case ssa.OpArg:
   708  		// input args need no code
   709  	case ssa.OpAMD64LoweredGetClosurePtr:
   710  		// Closure pointer is DX.
   711  		gc.CheckLoweredGetClosurePtr(v)
   712  	case ssa.OpAMD64LoweredGetG:
   713  		r := v.Reg()
   714  		// See the comments in cmd/internal/obj/x86/obj6.go
   715  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   716  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   717  			// MOVQ (TLS), r
   718  			p := gc.Prog(x86.AMOVQ)
   719  			p.From.Type = obj.TYPE_MEM
   720  			p.From.Reg = x86.REG_TLS
   721  			p.To.Type = obj.TYPE_REG
   722  			p.To.Reg = r
   723  		} else {
   724  			// MOVQ TLS, r
   725  			// MOVQ (r)(TLS*1), r
   726  			p := gc.Prog(x86.AMOVQ)
   727  			p.From.Type = obj.TYPE_REG
   728  			p.From.Reg = x86.REG_TLS
   729  			p.To.Type = obj.TYPE_REG
   730  			p.To.Reg = r
   731  			q := gc.Prog(x86.AMOVQ)
   732  			q.From.Type = obj.TYPE_MEM
   733  			q.From.Reg = r
   734  			q.From.Index = x86.REG_TLS
   735  			q.From.Scale = 1
   736  			q.To.Type = obj.TYPE_REG
   737  			q.To.Reg = r
   738  		}
   739  	case ssa.OpAMD64CALLstatic:
   740  		if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
   741  			// Deferred calls will appear to be returning to
   742  			// the CALL deferreturn(SB) that we are about to emit.
   743  			// However, the stack trace code will show the line
   744  			// of the instruction byte before the return PC.
   745  			// To avoid that being an unrelated instruction,
   746  			// insert an actual hardware NOP that will have the right line number.
   747  			// This is different from obj.ANOP, which is a virtual no-op
   748  			// that doesn't make it into the instruction stream.
   749  			ginsnop()
   750  		}
   751  		p := gc.Prog(obj.ACALL)
   752  		p.To.Type = obj.TYPE_MEM
   753  		p.To.Name = obj.NAME_EXTERN
   754  		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
   755  		if gc.Maxarg < v.AuxInt {
   756  			gc.Maxarg = v.AuxInt
   757  		}
   758  	case ssa.OpAMD64CALLclosure:
   759  		p := gc.Prog(obj.ACALL)
   760  		p.To.Type = obj.TYPE_REG
   761  		p.To.Reg = v.Args[0].Reg()
   762  		if gc.Maxarg < v.AuxInt {
   763  			gc.Maxarg = v.AuxInt
   764  		}
   765  	case ssa.OpAMD64CALLdefer:
   766  		p := gc.Prog(obj.ACALL)
   767  		p.To.Type = obj.TYPE_MEM
   768  		p.To.Name = obj.NAME_EXTERN
   769  		p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
   770  		if gc.Maxarg < v.AuxInt {
   771  			gc.Maxarg = v.AuxInt
   772  		}
   773  	case ssa.OpAMD64CALLgo:
   774  		p := gc.Prog(obj.ACALL)
   775  		p.To.Type = obj.TYPE_MEM
   776  		p.To.Name = obj.NAME_EXTERN
   777  		p.To.Sym = gc.Linksym(gc.Newproc.Sym)
   778  		if gc.Maxarg < v.AuxInt {
   779  			gc.Maxarg = v.AuxInt
   780  		}
   781  	case ssa.OpAMD64CALLinter:
   782  		p := gc.Prog(obj.ACALL)
   783  		p.To.Type = obj.TYPE_REG
   784  		p.To.Reg = v.Args[0].Reg()
   785  		if gc.Maxarg < v.AuxInt {
   786  			gc.Maxarg = v.AuxInt
   787  		}
   788  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   789  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   790  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   791  		r := v.Reg()
   792  		if r != v.Args[0].Reg() {
   793  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   794  		}
   795  		p := gc.Prog(v.Op.Asm())
   796  		p.To.Type = obj.TYPE_REG
   797  		p.To.Reg = r
   798  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL:
   799  		p := gc.Prog(v.Op.Asm())
   800  		p.From.Type = obj.TYPE_REG
   801  		p.From.Reg = v.Args[0].Reg()
   802  		p.To.Type = obj.TYPE_REG
   803  		p.To.Reg = v.Reg0()
   804  	case ssa.OpAMD64SQRTSD:
   805  		p := gc.Prog(v.Op.Asm())
   806  		p.From.Type = obj.TYPE_REG
   807  		p.From.Reg = v.Args[0].Reg()
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = v.Reg()
   810  	case ssa.OpSP, ssa.OpSB:
   811  		// nothing to do
   812  	case ssa.OpSelect0, ssa.OpSelect1:
   813  		// nothing to do
   814  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   815  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   816  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   817  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   818  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   819  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   820  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   821  		p := gc.Prog(v.Op.Asm())
   822  		p.To.Type = obj.TYPE_REG
   823  		p.To.Reg = v.Reg()
   824  
   825  	case ssa.OpAMD64SETNEF:
   826  		p := gc.Prog(v.Op.Asm())
   827  		p.To.Type = obj.TYPE_REG
   828  		p.To.Reg = v.Reg()
   829  		q := gc.Prog(x86.ASETPS)
   830  		q.To.Type = obj.TYPE_REG
   831  		q.To.Reg = x86.REG_AX
   832  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   833  		opregreg(x86.AORL, v.Reg(), x86.REG_AX)
   834  
   835  	case ssa.OpAMD64SETEQF:
   836  		p := gc.Prog(v.Op.Asm())
   837  		p.To.Type = obj.TYPE_REG
   838  		p.To.Reg = v.Reg()
   839  		q := gc.Prog(x86.ASETPC)
   840  		q.To.Type = obj.TYPE_REG
   841  		q.To.Reg = x86.REG_AX
   842  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   843  		opregreg(x86.AANDL, v.Reg(), x86.REG_AX)
   844  
   845  	case ssa.OpAMD64InvertFlags:
   846  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   847  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   848  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   849  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   850  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   851  	case ssa.OpAMD64REPSTOSQ:
   852  		gc.Prog(x86.AREP)
   853  		gc.Prog(x86.ASTOSQ)
   854  	case ssa.OpAMD64REPMOVSQ:
   855  		gc.Prog(x86.AREP)
   856  		gc.Prog(x86.AMOVSQ)
   857  	case ssa.OpVarDef:
   858  		gc.Gvardef(v.Aux.(*gc.Node))
   859  	case ssa.OpVarKill:
   860  		gc.Gvarkill(v.Aux.(*gc.Node))
   861  	case ssa.OpVarLive:
   862  		gc.Gvarlive(v.Aux.(*gc.Node))
   863  	case ssa.OpKeepAlive:
   864  		gc.KeepAlive(v)
   865  	case ssa.OpAMD64LoweredNilCheck:
   866  		// Issue a load which will fault if the input is nil.
   867  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   868  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   869  		// but it doesn't have false dependency on AX.
   870  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   871  		// That trades clobbering flags for clobbering a register.
   872  		p := gc.Prog(x86.ATESTB)
   873  		p.From.Type = obj.TYPE_REG
   874  		p.From.Reg = x86.REG_AX
   875  		p.To.Type = obj.TYPE_MEM
   876  		p.To.Reg = v.Args[0].Reg()
   877  		gc.AddAux(&p.To, v)
   878  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   879  			gc.Warnl(v.Pos, "generated nil check")
   880  		}
   881  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   882  		p := gc.Prog(v.Op.Asm())
   883  		p.From.Type = obj.TYPE_MEM
   884  		p.From.Reg = v.Args[0].Reg()
   885  		gc.AddAux(&p.From, v)
   886  		p.To.Type = obj.TYPE_REG
   887  		p.To.Reg = v.Reg0()
   888  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   889  		r := v.Reg0()
   890  		if r != v.Args[0].Reg() {
   891  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   892  		}
   893  		p := gc.Prog(v.Op.Asm())
   894  		p.From.Type = obj.TYPE_REG
   895  		p.From.Reg = r
   896  		p.To.Type = obj.TYPE_MEM
   897  		p.To.Reg = v.Args[1].Reg()
   898  		gc.AddAux(&p.To, v)
   899  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   900  		r := v.Reg0()
   901  		if r != v.Args[0].Reg() {
   902  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   903  		}
   904  		gc.Prog(x86.ALOCK)
   905  		p := gc.Prog(v.Op.Asm())
   906  		p.From.Type = obj.TYPE_REG
   907  		p.From.Reg = r
   908  		p.To.Type = obj.TYPE_MEM
   909  		p.To.Reg = v.Args[1].Reg()
   910  		gc.AddAux(&p.To, v)
   911  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   912  		if v.Args[1].Reg() != x86.REG_AX {
   913  			v.Fatalf("input[1] not in AX %s", v.LongString())
   914  		}
   915  		gc.Prog(x86.ALOCK)
   916  		p := gc.Prog(v.Op.Asm())
   917  		p.From.Type = obj.TYPE_REG
   918  		p.From.Reg = v.Args[2].Reg()
   919  		p.To.Type = obj.TYPE_MEM
   920  		p.To.Reg = v.Args[0].Reg()
   921  		gc.AddAux(&p.To, v)
   922  		p = gc.Prog(x86.ASETEQ)
   923  		p.To.Type = obj.TYPE_REG
   924  		p.To.Reg = v.Reg0()
   925  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   926  		gc.Prog(x86.ALOCK)
   927  		p := gc.Prog(v.Op.Asm())
   928  		p.From.Type = obj.TYPE_REG
   929  		p.From.Reg = v.Args[1].Reg()
   930  		p.To.Type = obj.TYPE_MEM
   931  		p.To.Reg = v.Args[0].Reg()
   932  		gc.AddAux(&p.To, v)
   933  	default:
   934  		v.Fatalf("genValue not implemented: %s", v.LongString())
   935  	}
   936  }
   937  
   938  var blockJump = [...]struct {
   939  	asm, invasm obj.As
   940  }{
   941  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   942  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   943  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   944  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   945  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   946  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   947  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   948  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   949  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   950  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   951  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   952  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   953  }
   954  
   955  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   956  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   957  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   958  }
   959  var nefJumps = [2][2]gc.FloatingEQNEJump{
   960  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   961  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   962  }
   963  
   964  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   965  	s.SetPos(b.Pos)
   966  
   967  	switch b.Kind {
   968  	case ssa.BlockPlain:
   969  		if b.Succs[0].Block() != next {
   970  			p := gc.Prog(obj.AJMP)
   971  			p.To.Type = obj.TYPE_BRANCH
   972  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   973  		}
   974  	case ssa.BlockDefer:
   975  		// defer returns in rax:
   976  		// 0 if we should continue executing
   977  		// 1 if we should jump to deferreturn call
   978  		p := gc.Prog(x86.ATESTL)
   979  		p.From.Type = obj.TYPE_REG
   980  		p.From.Reg = x86.REG_AX
   981  		p.To.Type = obj.TYPE_REG
   982  		p.To.Reg = x86.REG_AX
   983  		p = gc.Prog(x86.AJNE)
   984  		p.To.Type = obj.TYPE_BRANCH
   985  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   986  		if b.Succs[0].Block() != next {
   987  			p := gc.Prog(obj.AJMP)
   988  			p.To.Type = obj.TYPE_BRANCH
   989  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   990  		}
   991  	case ssa.BlockExit:
   992  		gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   993  	case ssa.BlockRet:
   994  		gc.Prog(obj.ARET)
   995  	case ssa.BlockRetJmp:
   996  		p := gc.Prog(obj.AJMP)
   997  		p.To.Type = obj.TYPE_MEM
   998  		p.To.Name = obj.NAME_EXTERN
   999  		p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
  1000  
  1001  	case ssa.BlockAMD64EQF:
  1002  		gc.SSAGenFPJump(s, b, next, &eqfJumps)
  1003  
  1004  	case ssa.BlockAMD64NEF:
  1005  		gc.SSAGenFPJump(s, b, next, &nefJumps)
  1006  
  1007  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1008  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1009  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1010  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1011  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1012  		jmp := blockJump[b.Kind]
  1013  		likely := b.Likely
  1014  		var p *obj.Prog
  1015  		switch next {
  1016  		case b.Succs[0].Block():
  1017  			p = gc.Prog(jmp.invasm)
  1018  			likely *= -1
  1019  			p.To.Type = obj.TYPE_BRANCH
  1020  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1021  		case b.Succs[1].Block():
  1022  			p = gc.Prog(jmp.asm)
  1023  			p.To.Type = obj.TYPE_BRANCH
  1024  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1025  		default:
  1026  			p = gc.Prog(jmp.asm)
  1027  			p.To.Type = obj.TYPE_BRANCH
  1028  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1029  			q := gc.Prog(obj.AJMP)
  1030  			q.To.Type = obj.TYPE_BRANCH
  1031  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
  1032  		}
  1033  
  1034  		// liblink reorders the instruction stream as it sees fit.
  1035  		// Pass along what we know so liblink can make use of it.
  1036  		// TODO: Once we've fully switched to SSA,
  1037  		// make liblink leave our output alone.
  1038  		switch likely {
  1039  		case ssa.BranchUnlikely:
  1040  			p.From.Type = obj.TYPE_CONST
  1041  			p.From.Offset = 0
  1042  		case ssa.BranchLikely:
  1043  			p.From.Type = obj.TYPE_CONST
  1044  			p.From.Offset = 1
  1045  		}
  1046  
  1047  	default:
  1048  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1049  	}
  1050  }