github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/x86"
    15  )
    16  
    17  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    18  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    19  	flive := b.FlagsLiveAtEnd
    20  	if b.Control != nil && b.Control.Type.IsFlags() {
    21  		flive = true
    22  	}
    23  	for i := len(b.Values) - 1; i >= 0; i-- {
    24  		v := b.Values[i]
    25  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    26  			// The "mark" is any non-nil Aux value.
    27  			v.Aux = v
    28  		}
    29  		if v.Type.IsFlags() {
    30  			flive = false
    31  		}
    32  		for _, a := range v.Args {
    33  			if a.Type.IsFlags() {
    34  				flive = true
    35  			}
    36  		}
    37  	}
    38  }
    39  
    40  // loadByType returns the load instruction of the given type.
    41  func loadByType(t ssa.Type) obj.As {
    42  	// Avoid partial register write
    43  	if !t.IsFloat() && t.Size() <= 2 {
    44  		if t.Size() == 1 {
    45  			return x86.AMOVBLZX
    46  		} else {
    47  			return x86.AMOVWLZX
    48  		}
    49  	}
    50  	// Otherwise, there's no difference between load and store opcodes.
    51  	return storeByType(t)
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t ssa.Type) obj.As {
    56  	width := t.Size()
    57  	if t.IsFloat() {
    58  		switch width {
    59  		case 4:
    60  			return x86.AMOVSS
    61  		case 8:
    62  			return x86.AMOVSD
    63  		}
    64  	} else {
    65  		switch width {
    66  		case 1:
    67  			return x86.AMOVB
    68  		case 2:
    69  			return x86.AMOVW
    70  		case 4:
    71  			return x86.AMOVL
    72  		case 8:
    73  			return x86.AMOVQ
    74  		}
    75  	}
    76  	panic("bad store type")
    77  }
    78  
    79  // moveByType returns the reg->reg move instruction of the given type.
    80  func moveByType(t ssa.Type) obj.As {
    81  	if t.IsFloat() {
    82  		// Moving the whole sse2 register is faster
    83  		// than moving just the correct low portion of it.
    84  		// There is no xmm->xmm move with 1 byte opcode,
    85  		// so use movups, which has 2 byte opcode.
    86  		return x86.AMOVUPS
    87  	} else {
    88  		switch t.Size() {
    89  		case 1:
    90  			// Avoids partial register write
    91  			return x86.AMOVL
    92  		case 2:
    93  			return x86.AMOVL
    94  		case 4:
    95  			return x86.AMOVL
    96  		case 8:
    97  			return x86.AMOVQ
    98  		case 16:
    99  			return x86.AMOVUPS // int128s are in SSE registers
   100  		default:
   101  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   102  		}
   103  	}
   104  }
   105  
   106  // opregreg emits instructions for
   107  //     dest := dest(To) op src(From)
   108  // and also returns the created obj.Prog so it
   109  // may be further adjusted (offset, scale, etc).
   110  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   111  	p := s.Prog(op)
   112  	p.From.Type = obj.TYPE_REG
   113  	p.To.Type = obj.TYPE_REG
   114  	p.To.Reg = dest
   115  	p.From.Reg = src
   116  	return p
   117  }
   118  
   119  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
   120  // See runtime/mkduff.go.
   121  func duffStart(size int64) int64 {
   122  	x, _ := duff(size)
   123  	return x
   124  }
   125  func duffAdj(size int64) int64 {
   126  	_, x := duff(size)
   127  	return x
   128  }
   129  
   130  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   131  // required to use the duffzero mechanism for a block of the given size.
   132  func duff(size int64) (int64, int64) {
   133  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   134  		panic("bad duffzero size")
   135  	}
   136  	steps := size / dzClearStep
   137  	blocks := steps / dzBlockLen
   138  	steps %= dzBlockLen
   139  	off := dzBlockSize * (dzBlocks - blocks)
   140  	var adj int64
   141  	if steps != 0 {
   142  		off -= dzAddSize
   143  		off -= dzMovSize * steps
   144  		adj -= dzClearStep * (dzBlockLen - steps)
   145  	}
   146  	return off, adj
   147  }
   148  
   149  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   150  	switch v.Op {
   151  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   152  		r := v.Reg()
   153  		r1 := v.Args[0].Reg()
   154  		r2 := v.Args[1].Reg()
   155  		switch {
   156  		case r == r1:
   157  			p := s.Prog(v.Op.Asm())
   158  			p.From.Type = obj.TYPE_REG
   159  			p.From.Reg = r2
   160  			p.To.Type = obj.TYPE_REG
   161  			p.To.Reg = r
   162  		case r == r2:
   163  			p := s.Prog(v.Op.Asm())
   164  			p.From.Type = obj.TYPE_REG
   165  			p.From.Reg = r1
   166  			p.To.Type = obj.TYPE_REG
   167  			p.To.Reg = r
   168  		default:
   169  			var asm obj.As
   170  			if v.Op == ssa.OpAMD64ADDQ {
   171  				asm = x86.ALEAQ
   172  			} else {
   173  				asm = x86.ALEAL
   174  			}
   175  			p := s.Prog(asm)
   176  			p.From.Type = obj.TYPE_MEM
   177  			p.From.Reg = r1
   178  			p.From.Scale = 1
   179  			p.From.Index = r2
   180  			p.To.Type = obj.TYPE_REG
   181  			p.To.Reg = r
   182  		}
   183  	// 2-address opcode arithmetic
   184  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   185  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   186  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   187  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   188  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   189  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   190  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   191  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   192  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   193  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   194  		ssa.OpAMD64PXOR:
   195  		r := v.Reg()
   196  		if r != v.Args[0].Reg() {
   197  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   198  		}
   199  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   200  
   201  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   202  		// Arg[0] (the dividend) is in AX.
   203  		// Arg[1] (the divisor) can be in any other register.
   204  		// Result[0] (the quotient) is in AX.
   205  		// Result[1] (the remainder) is in DX.
   206  		r := v.Args[1].Reg()
   207  
   208  		// Zero extend dividend.
   209  		c := s.Prog(x86.AXORL)
   210  		c.From.Type = obj.TYPE_REG
   211  		c.From.Reg = x86.REG_DX
   212  		c.To.Type = obj.TYPE_REG
   213  		c.To.Reg = x86.REG_DX
   214  
   215  		// Issue divide.
   216  		p := s.Prog(v.Op.Asm())
   217  		p.From.Type = obj.TYPE_REG
   218  		p.From.Reg = r
   219  
   220  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   221  		// Arg[0] (the dividend) is in AX.
   222  		// Arg[1] (the divisor) can be in any other register.
   223  		// Result[0] (the quotient) is in AX.
   224  		// Result[1] (the remainder) is in DX.
   225  		r := v.Args[1].Reg()
   226  
   227  		// CPU faults upon signed overflow, which occurs when the most
   228  		// negative int is divided by -1. Handle divide by -1 as a special case.
   229  		var c *obj.Prog
   230  		switch v.Op {
   231  		case ssa.OpAMD64DIVQ:
   232  			c = s.Prog(x86.ACMPQ)
   233  		case ssa.OpAMD64DIVL:
   234  			c = s.Prog(x86.ACMPL)
   235  		case ssa.OpAMD64DIVW:
   236  			c = s.Prog(x86.ACMPW)
   237  		}
   238  		c.From.Type = obj.TYPE_REG
   239  		c.From.Reg = r
   240  		c.To.Type = obj.TYPE_CONST
   241  		c.To.Offset = -1
   242  		j1 := s.Prog(x86.AJEQ)
   243  		j1.To.Type = obj.TYPE_BRANCH
   244  
   245  		// Sign extend dividend.
   246  		switch v.Op {
   247  		case ssa.OpAMD64DIVQ:
   248  			s.Prog(x86.ACQO)
   249  		case ssa.OpAMD64DIVL:
   250  			s.Prog(x86.ACDQ)
   251  		case ssa.OpAMD64DIVW:
   252  			s.Prog(x86.ACWD)
   253  		}
   254  
   255  		// Issue divide.
   256  		p := s.Prog(v.Op.Asm())
   257  		p.From.Type = obj.TYPE_REG
   258  		p.From.Reg = r
   259  
   260  		// Skip over -1 fixup code.
   261  		j2 := s.Prog(obj.AJMP)
   262  		j2.To.Type = obj.TYPE_BRANCH
   263  
   264  		// Issue -1 fixup code.
   265  		// n / -1 = -n
   266  		n1 := s.Prog(x86.ANEGQ)
   267  		n1.To.Type = obj.TYPE_REG
   268  		n1.To.Reg = x86.REG_AX
   269  
   270  		// n % -1 == 0
   271  		n2 := s.Prog(x86.AXORL)
   272  		n2.From.Type = obj.TYPE_REG
   273  		n2.From.Reg = x86.REG_DX
   274  		n2.To.Type = obj.TYPE_REG
   275  		n2.To.Reg = x86.REG_DX
   276  
   277  		// TODO(khr): issue only the -1 fixup code we need.
   278  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   279  
   280  		j1.To.Val = n1
   281  		j2.To.Val = s.Pc()
   282  
   283  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   284  		// the frontend rewrites constant division by 8/16/32 bit integers into
   285  		// HMUL by a constant
   286  		// SSA rewrites generate the 64 bit versions
   287  
   288  		// Arg[0] is already in AX as it's the only register we allow
   289  		// and DX is the only output we care about (the high bits)
   290  		p := s.Prog(v.Op.Asm())
   291  		p.From.Type = obj.TYPE_REG
   292  		p.From.Reg = v.Args[1].Reg()
   293  
   294  		// IMULB puts the high portion in AH instead of DL,
   295  		// so move it to DL for consistency
   296  		if v.Type.Size() == 1 {
   297  			m := s.Prog(x86.AMOVB)
   298  			m.From.Type = obj.TYPE_REG
   299  			m.From.Reg = x86.REG_AH
   300  			m.To.Type = obj.TYPE_REG
   301  			m.To.Reg = x86.REG_DX
   302  		}
   303  
   304  	case ssa.OpAMD64MULQU2:
   305  		// Arg[0] is already in AX as it's the only register we allow
   306  		// results hi in DX, lo in AX
   307  		p := s.Prog(v.Op.Asm())
   308  		p.From.Type = obj.TYPE_REG
   309  		p.From.Reg = v.Args[1].Reg()
   310  
   311  	case ssa.OpAMD64DIVQU2:
   312  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   313  		// results q in AX, r in DX
   314  		p := s.Prog(v.Op.Asm())
   315  		p.From.Type = obj.TYPE_REG
   316  		p.From.Reg = v.Args[2].Reg()
   317  
   318  	case ssa.OpAMD64AVGQU:
   319  		// compute (x+y)/2 unsigned.
   320  		// Do a 64-bit add, the overflow goes into the carry.
   321  		// Shift right once and pull the carry back into the 63rd bit.
   322  		r := v.Reg()
   323  		if r != v.Args[0].Reg() {
   324  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   325  		}
   326  		p := s.Prog(x86.AADDQ)
   327  		p.From.Type = obj.TYPE_REG
   328  		p.To.Type = obj.TYPE_REG
   329  		p.To.Reg = r
   330  		p.From.Reg = v.Args[1].Reg()
   331  		p = s.Prog(x86.ARCRQ)
   332  		p.From.Type = obj.TYPE_CONST
   333  		p.From.Offset = 1
   334  		p.To.Type = obj.TYPE_REG
   335  		p.To.Reg = r
   336  
   337  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   338  		r := v.Reg()
   339  		a := v.Args[0].Reg()
   340  		if r == a {
   341  			if v.AuxInt == 1 {
   342  				var asm obj.As
   343  				// Software optimization manual recommends add $1,reg.
   344  				// But inc/dec is 1 byte smaller. ICC always uses inc
   345  				// Clang/GCC choose depending on flags, but prefer add.
   346  				// Experiments show that inc/dec is both a little faster
   347  				// and make a binary a little smaller.
   348  				if v.Op == ssa.OpAMD64ADDQconst {
   349  					asm = x86.AINCQ
   350  				} else {
   351  					asm = x86.AINCL
   352  				}
   353  				p := s.Prog(asm)
   354  				p.To.Type = obj.TYPE_REG
   355  				p.To.Reg = r
   356  				return
   357  			}
   358  			if v.AuxInt == -1 {
   359  				var asm obj.As
   360  				if v.Op == ssa.OpAMD64ADDQconst {
   361  					asm = x86.ADECQ
   362  				} else {
   363  					asm = x86.ADECL
   364  				}
   365  				p := s.Prog(asm)
   366  				p.To.Type = obj.TYPE_REG
   367  				p.To.Reg = r
   368  				return
   369  			}
   370  			p := s.Prog(v.Op.Asm())
   371  			p.From.Type = obj.TYPE_CONST
   372  			p.From.Offset = v.AuxInt
   373  			p.To.Type = obj.TYPE_REG
   374  			p.To.Reg = r
   375  			return
   376  		}
   377  		var asm obj.As
   378  		if v.Op == ssa.OpAMD64ADDQconst {
   379  			asm = x86.ALEAQ
   380  		} else {
   381  			asm = x86.ALEAL
   382  		}
   383  		p := s.Prog(asm)
   384  		p.From.Type = obj.TYPE_MEM
   385  		p.From.Reg = a
   386  		p.From.Offset = v.AuxInt
   387  		p.To.Type = obj.TYPE_REG
   388  		p.To.Reg = r
   389  
   390  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   391  		r := v.Reg()
   392  		if r != v.Args[0].Reg() {
   393  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   394  		}
   395  		p := s.Prog(v.Op.Asm())
   396  		p.From.Type = obj.TYPE_REG
   397  		p.From.Reg = v.Args[1].Reg()
   398  		p.To.Type = obj.TYPE_REG
   399  		p.To.Reg = r
   400  
   401  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   402  		r := v.Reg()
   403  		if r != v.Args[0].Reg() {
   404  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   405  		}
   406  		p := s.Prog(v.Op.Asm())
   407  		p.From.Type = obj.TYPE_CONST
   408  		p.From.Offset = v.AuxInt
   409  		p.To.Type = obj.TYPE_REG
   410  		p.To.Reg = r
   411  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   412  		// then we don't need to use resultInArg0 for these ops.
   413  		//p.From3 = new(obj.Addr)
   414  		//p.From3.Type = obj.TYPE_REG
   415  		//p.From3.Reg = v.Args[0].Reg()
   416  
   417  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   418  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   419  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   420  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   421  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   422  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   423  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   424  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   425  		r := v.Reg()
   426  		if r != v.Args[0].Reg() {
   427  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   428  		}
   429  		p := s.Prog(v.Op.Asm())
   430  		p.From.Type = obj.TYPE_CONST
   431  		p.From.Offset = v.AuxInt
   432  		p.To.Type = obj.TYPE_REG
   433  		p.To.Reg = r
   434  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   435  		r := v.Reg()
   436  		p := s.Prog(v.Op.Asm())
   437  		p.From.Type = obj.TYPE_REG
   438  		p.From.Reg = r
   439  		p.To.Type = obj.TYPE_REG
   440  		p.To.Reg = r
   441  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   442  		r := v.Args[0].Reg()
   443  		i := v.Args[1].Reg()
   444  		p := s.Prog(x86.ALEAQ)
   445  		switch v.Op {
   446  		case ssa.OpAMD64LEAQ1:
   447  			p.From.Scale = 1
   448  			if i == x86.REG_SP {
   449  				r, i = i, r
   450  			}
   451  		case ssa.OpAMD64LEAQ2:
   452  			p.From.Scale = 2
   453  		case ssa.OpAMD64LEAQ4:
   454  			p.From.Scale = 4
   455  		case ssa.OpAMD64LEAQ8:
   456  			p.From.Scale = 8
   457  		}
   458  		p.From.Type = obj.TYPE_MEM
   459  		p.From.Reg = r
   460  		p.From.Index = i
   461  		gc.AddAux(&p.From, v)
   462  		p.To.Type = obj.TYPE_REG
   463  		p.To.Reg = v.Reg()
   464  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   465  		p := s.Prog(v.Op.Asm())
   466  		p.From.Type = obj.TYPE_MEM
   467  		p.From.Reg = v.Args[0].Reg()
   468  		gc.AddAux(&p.From, v)
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   472  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   473  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   474  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   475  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   476  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   477  		// must account for that right here.
   478  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   479  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   480  		p := s.Prog(v.Op.Asm())
   481  		p.From.Type = obj.TYPE_REG
   482  		p.From.Reg = v.Args[0].Reg()
   483  		p.To.Type = obj.TYPE_CONST
   484  		p.To.Offset = v.AuxInt
   485  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   486  		ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
   487  		p := s.Prog(v.Op.Asm())
   488  		p.From.Type = obj.TYPE_CONST
   489  		p.From.Offset = v.AuxInt
   490  		p.To.Type = obj.TYPE_REG
   491  		p.To.Reg = v.Args[0].Reg()
   492  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   493  		x := v.Reg()
   494  		p := s.Prog(v.Op.Asm())
   495  		p.From.Type = obj.TYPE_CONST
   496  		p.From.Offset = v.AuxInt
   497  		p.To.Type = obj.TYPE_REG
   498  		p.To.Reg = x
   499  		// If flags are live at this instruction, suppress the
   500  		// MOV $0,AX -> XOR AX,AX optimization.
   501  		if v.Aux != nil {
   502  			p.Mark |= x86.PRESERVEFLAGS
   503  		}
   504  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   505  		x := v.Reg()
   506  		p := s.Prog(v.Op.Asm())
   507  		p.From.Type = obj.TYPE_FCONST
   508  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   509  		p.To.Type = obj.TYPE_REG
   510  		p.To.Reg = x
   511  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   512  		p := s.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_MEM
   514  		p.From.Reg = v.Args[0].Reg()
   515  		gc.AddAux(&p.From, v)
   516  		p.To.Type = obj.TYPE_REG
   517  		p.To.Reg = v.Reg()
   518  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
   519  		p := s.Prog(v.Op.Asm())
   520  		p.From.Type = obj.TYPE_MEM
   521  		p.From.Reg = v.Args[0].Reg()
   522  		gc.AddAux(&p.From, v)
   523  		p.From.Scale = 8
   524  		p.From.Index = v.Args[1].Reg()
   525  		p.To.Type = obj.TYPE_REG
   526  		p.To.Reg = v.Reg()
   527  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   528  		p := s.Prog(v.Op.Asm())
   529  		p.From.Type = obj.TYPE_MEM
   530  		p.From.Reg = v.Args[0].Reg()
   531  		gc.AddAux(&p.From, v)
   532  		p.From.Scale = 4
   533  		p.From.Index = v.Args[1].Reg()
   534  		p.To.Type = obj.TYPE_REG
   535  		p.To.Reg = v.Reg()
   536  	case ssa.OpAMD64MOVWloadidx2:
   537  		p := s.Prog(v.Op.Asm())
   538  		p.From.Type = obj.TYPE_MEM
   539  		p.From.Reg = v.Args[0].Reg()
   540  		gc.AddAux(&p.From, v)
   541  		p.From.Scale = 2
   542  		p.From.Index = v.Args[1].Reg()
   543  		p.To.Type = obj.TYPE_REG
   544  		p.To.Reg = v.Reg()
   545  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   546  		r := v.Args[0].Reg()
   547  		i := v.Args[1].Reg()
   548  		if i == x86.REG_SP {
   549  			r, i = i, r
   550  		}
   551  		p := s.Prog(v.Op.Asm())
   552  		p.From.Type = obj.TYPE_MEM
   553  		p.From.Reg = r
   554  		p.From.Scale = 1
   555  		p.From.Index = i
   556  		gc.AddAux(&p.From, v)
   557  		p.To.Type = obj.TYPE_REG
   558  		p.To.Reg = v.Reg()
   559  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   560  		p := s.Prog(v.Op.Asm())
   561  		p.From.Type = obj.TYPE_REG
   562  		p.From.Reg = v.Args[1].Reg()
   563  		p.To.Type = obj.TYPE_MEM
   564  		p.To.Reg = v.Args[0].Reg()
   565  		gc.AddAux(&p.To, v)
   566  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
   567  		p := s.Prog(v.Op.Asm())
   568  		p.From.Type = obj.TYPE_REG
   569  		p.From.Reg = v.Args[2].Reg()
   570  		p.To.Type = obj.TYPE_MEM
   571  		p.To.Reg = v.Args[0].Reg()
   572  		p.To.Scale = 8
   573  		p.To.Index = v.Args[1].Reg()
   574  		gc.AddAux(&p.To, v)
   575  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   576  		p := s.Prog(v.Op.Asm())
   577  		p.From.Type = obj.TYPE_REG
   578  		p.From.Reg = v.Args[2].Reg()
   579  		p.To.Type = obj.TYPE_MEM
   580  		p.To.Reg = v.Args[0].Reg()
   581  		p.To.Scale = 4
   582  		p.To.Index = v.Args[1].Reg()
   583  		gc.AddAux(&p.To, v)
   584  	case ssa.OpAMD64MOVWstoreidx2:
   585  		p := s.Prog(v.Op.Asm())
   586  		p.From.Type = obj.TYPE_REG
   587  		p.From.Reg = v.Args[2].Reg()
   588  		p.To.Type = obj.TYPE_MEM
   589  		p.To.Reg = v.Args[0].Reg()
   590  		p.To.Scale = 2
   591  		p.To.Index = v.Args[1].Reg()
   592  		gc.AddAux(&p.To, v)
   593  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   594  		r := v.Args[0].Reg()
   595  		i := v.Args[1].Reg()
   596  		if i == x86.REG_SP {
   597  			r, i = i, r
   598  		}
   599  		p := s.Prog(v.Op.Asm())
   600  		p.From.Type = obj.TYPE_REG
   601  		p.From.Reg = v.Args[2].Reg()
   602  		p.To.Type = obj.TYPE_MEM
   603  		p.To.Reg = r
   604  		p.To.Scale = 1
   605  		p.To.Index = i
   606  		gc.AddAux(&p.To, v)
   607  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_CONST
   610  		sc := v.AuxValAndOff()
   611  		p.From.Offset = sc.Val()
   612  		p.To.Type = obj.TYPE_MEM
   613  		p.To.Reg = v.Args[0].Reg()
   614  		gc.AddAux2(&p.To, v, sc.Off())
   615  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   616  		p := s.Prog(v.Op.Asm())
   617  		p.From.Type = obj.TYPE_CONST
   618  		sc := v.AuxValAndOff()
   619  		p.From.Offset = sc.Val()
   620  		r := v.Args[0].Reg()
   621  		i := v.Args[1].Reg()
   622  		switch v.Op {
   623  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   624  			p.To.Scale = 1
   625  			if i == x86.REG_SP {
   626  				r, i = i, r
   627  			}
   628  		case ssa.OpAMD64MOVWstoreconstidx2:
   629  			p.To.Scale = 2
   630  		case ssa.OpAMD64MOVLstoreconstidx4:
   631  			p.To.Scale = 4
   632  		case ssa.OpAMD64MOVQstoreconstidx8:
   633  			p.To.Scale = 8
   634  		}
   635  		p.To.Type = obj.TYPE_MEM
   636  		p.To.Reg = r
   637  		p.To.Index = i
   638  		gc.AddAux2(&p.To, v, sc.Off())
   639  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   640  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   641  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   642  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   643  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   644  		r := v.Reg()
   645  		// Break false dependency on destination register.
   646  		opregreg(s, x86.AXORPS, r, r)
   647  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   648  	case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
   649  		ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
   650  		ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
   651  		ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
   652  		p := s.Prog(v.Op.Asm())
   653  		p.From.Type = obj.TYPE_MEM
   654  		p.From.Reg = v.Args[1].Reg()
   655  		gc.AddAux(&p.From, v)
   656  		p.To.Type = obj.TYPE_REG
   657  		p.To.Reg = v.Reg()
   658  		if v.Reg() != v.Args[0].Reg() {
   659  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   660  		}
   661  	case ssa.OpAMD64DUFFZERO:
   662  		off := duffStart(v.AuxInt)
   663  		adj := duffAdj(v.AuxInt)
   664  		var p *obj.Prog
   665  		if adj != 0 {
   666  			p = s.Prog(x86.AADDQ)
   667  			p.From.Type = obj.TYPE_CONST
   668  			p.From.Offset = adj
   669  			p.To.Type = obj.TYPE_REG
   670  			p.To.Reg = x86.REG_DI
   671  		}
   672  		p = s.Prog(obj.ADUFFZERO)
   673  		p.To.Type = obj.TYPE_ADDR
   674  		p.To.Sym = gc.Duffzero
   675  		p.To.Offset = off
   676  	case ssa.OpAMD64MOVOconst:
   677  		if v.AuxInt != 0 {
   678  			v.Fatalf("MOVOconst can only do constant=0")
   679  		}
   680  		r := v.Reg()
   681  		opregreg(s, x86.AXORPS, r, r)
   682  	case ssa.OpAMD64DUFFCOPY:
   683  		p := s.Prog(obj.ADUFFCOPY)
   684  		p.To.Type = obj.TYPE_ADDR
   685  		p.To.Sym = gc.Duffcopy
   686  		p.To.Offset = v.AuxInt
   687  
   688  	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   689  		if v.Type.IsMemory() {
   690  			return
   691  		}
   692  		x := v.Args[0].Reg()
   693  		y := v.Reg()
   694  		if x != y {
   695  			opregreg(s, moveByType(v.Type), y, x)
   696  		}
   697  	case ssa.OpLoadReg:
   698  		if v.Type.IsFlags() {
   699  			v.Fatalf("load flags not implemented: %v", v.LongString())
   700  			return
   701  		}
   702  		p := s.Prog(loadByType(v.Type))
   703  		gc.AddrAuto(&p.From, v.Args[0])
   704  		p.To.Type = obj.TYPE_REG
   705  		p.To.Reg = v.Reg()
   706  
   707  	case ssa.OpStoreReg:
   708  		if v.Type.IsFlags() {
   709  			v.Fatalf("store flags not implemented: %v", v.LongString())
   710  			return
   711  		}
   712  		p := s.Prog(storeByType(v.Type))
   713  		p.From.Type = obj.TYPE_REG
   714  		p.From.Reg = v.Args[0].Reg()
   715  		gc.AddrAuto(&p.To, v)
   716  	case ssa.OpAMD64LoweredGetClosurePtr:
   717  		// Closure pointer is DX.
   718  		gc.CheckLoweredGetClosurePtr(v)
   719  	case ssa.OpAMD64LoweredGetG:
   720  		r := v.Reg()
   721  		// See the comments in cmd/internal/obj/x86/obj6.go
   722  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   723  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   724  			// MOVQ (TLS), r
   725  			p := s.Prog(x86.AMOVQ)
   726  			p.From.Type = obj.TYPE_MEM
   727  			p.From.Reg = x86.REG_TLS
   728  			p.To.Type = obj.TYPE_REG
   729  			p.To.Reg = r
   730  		} else {
   731  			// MOVQ TLS, r
   732  			// MOVQ (r)(TLS*1), r
   733  			p := s.Prog(x86.AMOVQ)
   734  			p.From.Type = obj.TYPE_REG
   735  			p.From.Reg = x86.REG_TLS
   736  			p.To.Type = obj.TYPE_REG
   737  			p.To.Reg = r
   738  			q := s.Prog(x86.AMOVQ)
   739  			q.From.Type = obj.TYPE_MEM
   740  			q.From.Reg = r
   741  			q.From.Index = x86.REG_TLS
   742  			q.From.Scale = 1
   743  			q.To.Type = obj.TYPE_REG
   744  			q.To.Reg = r
   745  		}
   746  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   747  		s.Call(v)
   748  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   749  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   750  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   751  		r := v.Reg()
   752  		if r != v.Args[0].Reg() {
   753  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   754  		}
   755  		p := s.Prog(v.Op.Asm())
   756  		p.To.Type = obj.TYPE_REG
   757  		p.To.Reg = r
   758  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
   759  		p := s.Prog(v.Op.Asm())
   760  		p.From.Type = obj.TYPE_REG
   761  		p.From.Reg = v.Args[0].Reg()
   762  		p.To.Type = obj.TYPE_REG
   763  		p.To.Reg = v.Reg0()
   764  	case ssa.OpAMD64SQRTSD:
   765  		p := s.Prog(v.Op.Asm())
   766  		p.From.Type = obj.TYPE_REG
   767  		p.From.Reg = v.Args[0].Reg()
   768  		p.To.Type = obj.TYPE_REG
   769  		p.To.Reg = v.Reg()
   770  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
   771  		if v.Args[0].Reg() != v.Reg() {
   772  			// POPCNT on Intel has a false dependency on the destination register.
   773  			// Zero the destination to break the dependency.
   774  			p := s.Prog(x86.AMOVQ)
   775  			p.From.Type = obj.TYPE_CONST
   776  			p.From.Offset = 0
   777  			p.To.Type = obj.TYPE_REG
   778  			p.To.Reg = v.Reg()
   779  		}
   780  		p := s.Prog(v.Op.Asm())
   781  		p.From.Type = obj.TYPE_REG
   782  		p.From.Reg = v.Args[0].Reg()
   783  		p.To.Type = obj.TYPE_REG
   784  		p.To.Reg = v.Reg()
   785  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   786  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   787  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   788  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   789  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   790  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   791  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   792  		p := s.Prog(v.Op.Asm())
   793  		p.To.Type = obj.TYPE_REG
   794  		p.To.Reg = v.Reg()
   795  
   796  	case ssa.OpAMD64SETNEF:
   797  		p := s.Prog(v.Op.Asm())
   798  		p.To.Type = obj.TYPE_REG
   799  		p.To.Reg = v.Reg()
   800  		q := s.Prog(x86.ASETPS)
   801  		q.To.Type = obj.TYPE_REG
   802  		q.To.Reg = x86.REG_AX
   803  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   804  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   805  
   806  	case ssa.OpAMD64SETEQF:
   807  		p := s.Prog(v.Op.Asm())
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = v.Reg()
   810  		q := s.Prog(x86.ASETPC)
   811  		q.To.Type = obj.TYPE_REG
   812  		q.To.Reg = x86.REG_AX
   813  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   814  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   815  
   816  	case ssa.OpAMD64InvertFlags:
   817  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   818  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   819  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   820  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   821  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   822  	case ssa.OpAMD64REPSTOSQ:
   823  		s.Prog(x86.AREP)
   824  		s.Prog(x86.ASTOSQ)
   825  	case ssa.OpAMD64REPMOVSQ:
   826  		s.Prog(x86.AREP)
   827  		s.Prog(x86.AMOVSQ)
   828  	case ssa.OpAMD64LoweredNilCheck:
   829  		// Issue a load which will fault if the input is nil.
   830  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   831  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   832  		// but it doesn't have false dependency on AX.
   833  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   834  		// That trades clobbering flags for clobbering a register.
   835  		p := s.Prog(x86.ATESTB)
   836  		p.From.Type = obj.TYPE_REG
   837  		p.From.Reg = x86.REG_AX
   838  		p.To.Type = obj.TYPE_MEM
   839  		p.To.Reg = v.Args[0].Reg()
   840  		gc.AddAux(&p.To, v)
   841  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   842  			gc.Warnl(v.Pos, "generated nil check")
   843  		}
   844  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   845  		p := s.Prog(v.Op.Asm())
   846  		p.From.Type = obj.TYPE_MEM
   847  		p.From.Reg = v.Args[0].Reg()
   848  		gc.AddAux(&p.From, v)
   849  		p.To.Type = obj.TYPE_REG
   850  		p.To.Reg = v.Reg0()
   851  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   852  		r := v.Reg0()
   853  		if r != v.Args[0].Reg() {
   854  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   855  		}
   856  		p := s.Prog(v.Op.Asm())
   857  		p.From.Type = obj.TYPE_REG
   858  		p.From.Reg = r
   859  		p.To.Type = obj.TYPE_MEM
   860  		p.To.Reg = v.Args[1].Reg()
   861  		gc.AddAux(&p.To, v)
   862  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   863  		r := v.Reg0()
   864  		if r != v.Args[0].Reg() {
   865  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   866  		}
   867  		s.Prog(x86.ALOCK)
   868  		p := s.Prog(v.Op.Asm())
   869  		p.From.Type = obj.TYPE_REG
   870  		p.From.Reg = r
   871  		p.To.Type = obj.TYPE_MEM
   872  		p.To.Reg = v.Args[1].Reg()
   873  		gc.AddAux(&p.To, v)
   874  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   875  		if v.Args[1].Reg() != x86.REG_AX {
   876  			v.Fatalf("input[1] not in AX %s", v.LongString())
   877  		}
   878  		s.Prog(x86.ALOCK)
   879  		p := s.Prog(v.Op.Asm())
   880  		p.From.Type = obj.TYPE_REG
   881  		p.From.Reg = v.Args[2].Reg()
   882  		p.To.Type = obj.TYPE_MEM
   883  		p.To.Reg = v.Args[0].Reg()
   884  		gc.AddAux(&p.To, v)
   885  		p = s.Prog(x86.ASETEQ)
   886  		p.To.Type = obj.TYPE_REG
   887  		p.To.Reg = v.Reg0()
   888  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   889  		s.Prog(x86.ALOCK)
   890  		p := s.Prog(v.Op.Asm())
   891  		p.From.Type = obj.TYPE_REG
   892  		p.From.Reg = v.Args[1].Reg()
   893  		p.To.Type = obj.TYPE_MEM
   894  		p.To.Reg = v.Args[0].Reg()
   895  		gc.AddAux(&p.To, v)
   896  	default:
   897  		v.Fatalf("genValue not implemented: %s", v.LongString())
   898  	}
   899  }
   900  
   901  var blockJump = [...]struct {
   902  	asm, invasm obj.As
   903  }{
   904  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   905  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   906  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   907  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   908  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   909  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   910  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   911  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   912  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   913  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   914  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   915  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   916  }
   917  
   918  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   919  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   920  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   921  }
   922  var nefJumps = [2][2]gc.FloatingEQNEJump{
   923  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   924  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   925  }
   926  
   927  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   928  	switch b.Kind {
   929  	case ssa.BlockPlain:
   930  		if b.Succs[0].Block() != next {
   931  			p := s.Prog(obj.AJMP)
   932  			p.To.Type = obj.TYPE_BRANCH
   933  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   934  		}
   935  	case ssa.BlockDefer:
   936  		// defer returns in rax:
   937  		// 0 if we should continue executing
   938  		// 1 if we should jump to deferreturn call
   939  		p := s.Prog(x86.ATESTL)
   940  		p.From.Type = obj.TYPE_REG
   941  		p.From.Reg = x86.REG_AX
   942  		p.To.Type = obj.TYPE_REG
   943  		p.To.Reg = x86.REG_AX
   944  		p = s.Prog(x86.AJNE)
   945  		p.To.Type = obj.TYPE_BRANCH
   946  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   947  		if b.Succs[0].Block() != next {
   948  			p := s.Prog(obj.AJMP)
   949  			p.To.Type = obj.TYPE_BRANCH
   950  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   951  		}
   952  	case ssa.BlockExit:
   953  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   954  	case ssa.BlockRet:
   955  		s.Prog(obj.ARET)
   956  	case ssa.BlockRetJmp:
   957  		p := s.Prog(obj.AJMP)
   958  		p.To.Type = obj.TYPE_MEM
   959  		p.To.Name = obj.NAME_EXTERN
   960  		p.To.Sym = b.Aux.(*obj.LSym)
   961  
   962  	case ssa.BlockAMD64EQF:
   963  		s.FPJump(b, next, &eqfJumps)
   964  
   965  	case ssa.BlockAMD64NEF:
   966  		s.FPJump(b, next, &nefJumps)
   967  
   968  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
   969  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
   970  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
   971  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
   972  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
   973  		jmp := blockJump[b.Kind]
   974  		var p *obj.Prog
   975  		switch next {
   976  		case b.Succs[0].Block():
   977  			p = s.Prog(jmp.invasm)
   978  			p.To.Type = obj.TYPE_BRANCH
   979  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   980  		case b.Succs[1].Block():
   981  			p = s.Prog(jmp.asm)
   982  			p.To.Type = obj.TYPE_BRANCH
   983  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   984  		default:
   985  			p = s.Prog(jmp.asm)
   986  			p.To.Type = obj.TYPE_BRANCH
   987  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   988  			q := s.Prog(obj.AJMP)
   989  			q.To.Type = obj.TYPE_BRANCH
   990  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   991  		}
   992  
   993  	default:
   994  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   995  	}
   996  }