github.com/karrick/go@v0.0.0-20170817181416-d5b0ec858b37/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		case 8:
    74  			return x86.AMOVQ
    75  		}
    76  	}
    77  	panic("bad store type")
    78  }
    79  
    80  // moveByType returns the reg->reg move instruction of the given type.
    81  func moveByType(t *types.Type) obj.As {
    82  	if t.IsFloat() {
    83  		// Moving the whole sse2 register is faster
    84  		// than moving just the correct low portion of it.
    85  		// There is no xmm->xmm move with 1 byte opcode,
    86  		// so use movups, which has 2 byte opcode.
    87  		return x86.AMOVUPS
    88  	} else {
    89  		switch t.Size() {
    90  		case 1:
    91  			// Avoids partial register write
    92  			return x86.AMOVL
    93  		case 2:
    94  			return x86.AMOVL
    95  		case 4:
    96  			return x86.AMOVL
    97  		case 8:
    98  			return x86.AMOVQ
    99  		case 16:
   100  			return x86.AMOVUPS // int128s are in SSE registers
   101  		default:
   102  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   103  		}
   104  	}
   105  }
   106  
   107  // opregreg emits instructions for
   108  //     dest := dest(To) op src(From)
   109  // and also returns the created obj.Prog so it
   110  // may be further adjusted (offset, scale, etc).
   111  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   112  	p := s.Prog(op)
   113  	p.From.Type = obj.TYPE_REG
   114  	p.To.Type = obj.TYPE_REG
   115  	p.To.Reg = dest
   116  	p.From.Reg = src
   117  	return p
   118  }
   119  
   120  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   121  // See runtime/mkduff.go.
   122  func duffStart(size int64) int64 {
   123  	x, _ := duff(size)
   124  	return x
   125  }
   126  func duffAdj(size int64) int64 {
   127  	_, x := duff(size)
   128  	return x
   129  }
   130  
   131  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   132  // required to use the duffzero mechanism for a block of the given size.
   133  func duff(size int64) (int64, int64) {
   134  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   135  		panic("bad duffzero size")
   136  	}
   137  	steps := size / dzClearStep
   138  	blocks := steps / dzBlockLen
   139  	steps %= dzBlockLen
   140  	off := dzBlockSize * (dzBlocks - blocks)
   141  	var adj int64
   142  	if steps != 0 {
   143  		off -= dzLeaqSize
   144  		off -= dzMovSize * steps
   145  		adj -= dzClearStep * (dzBlockLen - steps)
   146  	}
   147  	return off, adj
   148  }
   149  
   150  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   151  	switch v.Op {
   152  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   153  		r := v.Reg()
   154  		r1 := v.Args[0].Reg()
   155  		r2 := v.Args[1].Reg()
   156  		switch {
   157  		case r == r1:
   158  			p := s.Prog(v.Op.Asm())
   159  			p.From.Type = obj.TYPE_REG
   160  			p.From.Reg = r2
   161  			p.To.Type = obj.TYPE_REG
   162  			p.To.Reg = r
   163  		case r == r2:
   164  			p := s.Prog(v.Op.Asm())
   165  			p.From.Type = obj.TYPE_REG
   166  			p.From.Reg = r1
   167  			p.To.Type = obj.TYPE_REG
   168  			p.To.Reg = r
   169  		default:
   170  			var asm obj.As
   171  			if v.Op == ssa.OpAMD64ADDQ {
   172  				asm = x86.ALEAQ
   173  			} else {
   174  				asm = x86.ALEAL
   175  			}
   176  			p := s.Prog(asm)
   177  			p.From.Type = obj.TYPE_MEM
   178  			p.From.Reg = r1
   179  			p.From.Scale = 1
   180  			p.From.Index = r2
   181  			p.To.Type = obj.TYPE_REG
   182  			p.To.Reg = r
   183  		}
   184  	// 2-address opcode arithmetic
   185  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   186  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   187  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   188  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   189  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   190  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   191  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   192  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   193  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   194  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   195  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   196  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   197  		ssa.OpAMD64PXOR:
   198  		r := v.Reg()
   199  		if r != v.Args[0].Reg() {
   200  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   201  		}
   202  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   203  
   204  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   205  		// Arg[0] (the dividend) is in AX.
   206  		// Arg[1] (the divisor) can be in any other register.
   207  		// Result[0] (the quotient) is in AX.
   208  		// Result[1] (the remainder) is in DX.
   209  		r := v.Args[1].Reg()
   210  
   211  		// Zero extend dividend.
   212  		c := s.Prog(x86.AXORL)
   213  		c.From.Type = obj.TYPE_REG
   214  		c.From.Reg = x86.REG_DX
   215  		c.To.Type = obj.TYPE_REG
   216  		c.To.Reg = x86.REG_DX
   217  
   218  		// Issue divide.
   219  		p := s.Prog(v.Op.Asm())
   220  		p.From.Type = obj.TYPE_REG
   221  		p.From.Reg = r
   222  
   223  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   224  		// Arg[0] (the dividend) is in AX.
   225  		// Arg[1] (the divisor) can be in any other register.
   226  		// Result[0] (the quotient) is in AX.
   227  		// Result[1] (the remainder) is in DX.
   228  		r := v.Args[1].Reg()
   229  
   230  		// CPU faults upon signed overflow, which occurs when the most
   231  		// negative int is divided by -1. Handle divide by -1 as a special case.
   232  		var c *obj.Prog
   233  		switch v.Op {
   234  		case ssa.OpAMD64DIVQ:
   235  			c = s.Prog(x86.ACMPQ)
   236  		case ssa.OpAMD64DIVL:
   237  			c = s.Prog(x86.ACMPL)
   238  		case ssa.OpAMD64DIVW:
   239  			c = s.Prog(x86.ACMPW)
   240  		}
   241  		c.From.Type = obj.TYPE_REG
   242  		c.From.Reg = r
   243  		c.To.Type = obj.TYPE_CONST
   244  		c.To.Offset = -1
   245  		j1 := s.Prog(x86.AJEQ)
   246  		j1.To.Type = obj.TYPE_BRANCH
   247  
   248  		// Sign extend dividend.
   249  		switch v.Op {
   250  		case ssa.OpAMD64DIVQ:
   251  			s.Prog(x86.ACQO)
   252  		case ssa.OpAMD64DIVL:
   253  			s.Prog(x86.ACDQ)
   254  		case ssa.OpAMD64DIVW:
   255  			s.Prog(x86.ACWD)
   256  		}
   257  
   258  		// Issue divide.
   259  		p := s.Prog(v.Op.Asm())
   260  		p.From.Type = obj.TYPE_REG
   261  		p.From.Reg = r
   262  
   263  		// Skip over -1 fixup code.
   264  		j2 := s.Prog(obj.AJMP)
   265  		j2.To.Type = obj.TYPE_BRANCH
   266  
   267  		// Issue -1 fixup code.
   268  		// n / -1 = -n
   269  		n1 := s.Prog(x86.ANEGQ)
   270  		n1.To.Type = obj.TYPE_REG
   271  		n1.To.Reg = x86.REG_AX
   272  
   273  		// n % -1 == 0
   274  		n2 := s.Prog(x86.AXORL)
   275  		n2.From.Type = obj.TYPE_REG
   276  		n2.From.Reg = x86.REG_DX
   277  		n2.To.Type = obj.TYPE_REG
   278  		n2.To.Reg = x86.REG_DX
   279  
   280  		// TODO(khr): issue only the -1 fixup code we need.
   281  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   282  
   283  		j1.To.Val = n1
   284  		j2.To.Val = s.Pc()
   285  
   286  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   287  		// the frontend rewrites constant division by 8/16/32 bit integers into
   288  		// HMUL by a constant
   289  		// SSA rewrites generate the 64 bit versions
   290  
   291  		// Arg[0] is already in AX as it's the only register we allow
   292  		// and DX is the only output we care about (the high bits)
   293  		p := s.Prog(v.Op.Asm())
   294  		p.From.Type = obj.TYPE_REG
   295  		p.From.Reg = v.Args[1].Reg()
   296  
   297  		// IMULB puts the high portion in AH instead of DL,
   298  		// so move it to DL for consistency
   299  		if v.Type.Size() == 1 {
   300  			m := s.Prog(x86.AMOVB)
   301  			m.From.Type = obj.TYPE_REG
   302  			m.From.Reg = x86.REG_AH
   303  			m.To.Type = obj.TYPE_REG
   304  			m.To.Reg = x86.REG_DX
   305  		}
   306  
   307  	case ssa.OpAMD64MULQU2:
   308  		// Arg[0] is already in AX as it's the only register we allow
   309  		// results hi in DX, lo in AX
   310  		p := s.Prog(v.Op.Asm())
   311  		p.From.Type = obj.TYPE_REG
   312  		p.From.Reg = v.Args[1].Reg()
   313  
   314  	case ssa.OpAMD64DIVQU2:
   315  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   316  		// results q in AX, r in DX
   317  		p := s.Prog(v.Op.Asm())
   318  		p.From.Type = obj.TYPE_REG
   319  		p.From.Reg = v.Args[2].Reg()
   320  
   321  	case ssa.OpAMD64AVGQU:
   322  		// compute (x+y)/2 unsigned.
   323  		// Do a 64-bit add, the overflow goes into the carry.
   324  		// Shift right once and pull the carry back into the 63rd bit.
   325  		r := v.Reg()
   326  		if r != v.Args[0].Reg() {
   327  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   328  		}
   329  		p := s.Prog(x86.AADDQ)
   330  		p.From.Type = obj.TYPE_REG
   331  		p.To.Type = obj.TYPE_REG
   332  		p.To.Reg = r
   333  		p.From.Reg = v.Args[1].Reg()
   334  		p = s.Prog(x86.ARCRQ)
   335  		p.From.Type = obj.TYPE_CONST
   336  		p.From.Offset = 1
   337  		p.To.Type = obj.TYPE_REG
   338  		p.To.Reg = r
   339  
   340  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   341  		r := v.Reg()
   342  		a := v.Args[0].Reg()
   343  		if r == a {
   344  			if v.AuxInt == 1 {
   345  				var asm obj.As
   346  				// Software optimization manual recommends add $1,reg.
   347  				// But inc/dec is 1 byte smaller. ICC always uses inc
   348  				// Clang/GCC choose depending on flags, but prefer add.
   349  				// Experiments show that inc/dec is both a little faster
   350  				// and make a binary a little smaller.
   351  				if v.Op == ssa.OpAMD64ADDQconst {
   352  					asm = x86.AINCQ
   353  				} else {
   354  					asm = x86.AINCL
   355  				}
   356  				p := s.Prog(asm)
   357  				p.To.Type = obj.TYPE_REG
   358  				p.To.Reg = r
   359  				return
   360  			}
   361  			if v.AuxInt == -1 {
   362  				var asm obj.As
   363  				if v.Op == ssa.OpAMD64ADDQconst {
   364  					asm = x86.ADECQ
   365  				} else {
   366  					asm = x86.ADECL
   367  				}
   368  				p := s.Prog(asm)
   369  				p.To.Type = obj.TYPE_REG
   370  				p.To.Reg = r
   371  				return
   372  			}
   373  			p := s.Prog(v.Op.Asm())
   374  			p.From.Type = obj.TYPE_CONST
   375  			p.From.Offset = v.AuxInt
   376  			p.To.Type = obj.TYPE_REG
   377  			p.To.Reg = r
   378  			return
   379  		}
   380  		var asm obj.As
   381  		if v.Op == ssa.OpAMD64ADDQconst {
   382  			asm = x86.ALEAQ
   383  		} else {
   384  			asm = x86.ALEAL
   385  		}
   386  		p := s.Prog(asm)
   387  		p.From.Type = obj.TYPE_MEM
   388  		p.From.Reg = a
   389  		p.From.Offset = v.AuxInt
   390  		p.To.Type = obj.TYPE_REG
   391  		p.To.Reg = r
   392  
   393  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   394  		r := v.Reg()
   395  		if r != v.Args[0].Reg() {
   396  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   397  		}
   398  		p := s.Prog(v.Op.Asm())
   399  		p.From.Type = obj.TYPE_REG
   400  		p.From.Reg = v.Args[1].Reg()
   401  		p.To.Type = obj.TYPE_REG
   402  		p.To.Reg = r
   403  
   404  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   405  		r := v.Reg()
   406  		if r != v.Args[0].Reg() {
   407  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   408  		}
   409  		p := s.Prog(v.Op.Asm())
   410  		p.From.Type = obj.TYPE_CONST
   411  		p.From.Offset = v.AuxInt
   412  		p.To.Type = obj.TYPE_REG
   413  		p.To.Reg = r
   414  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   415  		// then we don't need to use resultInArg0 for these ops.
   416  		//p.From3 = new(obj.Addr)
   417  		//p.From3.Type = obj.TYPE_REG
   418  		//p.From3.Reg = v.Args[0].Reg()
   419  
   420  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   421  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   422  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   423  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   424  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   425  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   426  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   427  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   428  		r := v.Reg()
   429  		if r != v.Args[0].Reg() {
   430  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   431  		}
   432  		p := s.Prog(v.Op.Asm())
   433  		p.From.Type = obj.TYPE_CONST
   434  		p.From.Offset = v.AuxInt
   435  		p.To.Type = obj.TYPE_REG
   436  		p.To.Reg = r
   437  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   438  		r := v.Reg()
   439  		p := s.Prog(v.Op.Asm())
   440  		p.From.Type = obj.TYPE_REG
   441  		p.From.Reg = r
   442  		p.To.Type = obj.TYPE_REG
   443  		p.To.Reg = r
   444  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   445  		r := v.Args[0].Reg()
   446  		i := v.Args[1].Reg()
   447  		p := s.Prog(x86.ALEAQ)
   448  		switch v.Op {
   449  		case ssa.OpAMD64LEAQ1:
   450  			p.From.Scale = 1
   451  			if i == x86.REG_SP {
   452  				r, i = i, r
   453  			}
   454  		case ssa.OpAMD64LEAQ2:
   455  			p.From.Scale = 2
   456  		case ssa.OpAMD64LEAQ4:
   457  			p.From.Scale = 4
   458  		case ssa.OpAMD64LEAQ8:
   459  			p.From.Scale = 8
   460  		}
   461  		p.From.Type = obj.TYPE_MEM
   462  		p.From.Reg = r
   463  		p.From.Index = i
   464  		gc.AddAux(&p.From, v)
   465  		p.To.Type = obj.TYPE_REG
   466  		p.To.Reg = v.Reg()
   467  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   468  		p := s.Prog(v.Op.Asm())
   469  		p.From.Type = obj.TYPE_MEM
   470  		p.From.Reg = v.Args[0].Reg()
   471  		gc.AddAux(&p.From, v)
   472  		p.To.Type = obj.TYPE_REG
   473  		p.To.Reg = v.Reg()
   474  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   475  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   476  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   477  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   478  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   479  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   480  		// must account for that right here.
   481  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   482  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   483  		p := s.Prog(v.Op.Asm())
   484  		p.From.Type = obj.TYPE_REG
   485  		p.From.Reg = v.Args[0].Reg()
   486  		p.To.Type = obj.TYPE_CONST
   487  		p.To.Offset = v.AuxInt
   488  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   489  		ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
   490  		p := s.Prog(v.Op.Asm())
   491  		p.From.Type = obj.TYPE_CONST
   492  		p.From.Offset = v.AuxInt
   493  		p.To.Type = obj.TYPE_REG
   494  		p.To.Reg = v.Args[0].Reg()
   495  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   496  		x := v.Reg()
   497  		asm := v.Op.Asm()
   498  		// Use MOVL to move a small constant into a register
   499  		// when the constant is positive and fits into 32 bits.
   500  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   501  			// The upper 32bit are zeroed automatically when using MOVL.
   502  			asm = x86.AMOVL
   503  		}
   504  		p := s.Prog(asm)
   505  		p.From.Type = obj.TYPE_CONST
   506  		p.From.Offset = v.AuxInt
   507  		p.To.Type = obj.TYPE_REG
   508  		p.To.Reg = x
   509  		// If flags are live at this instruction, suppress the
   510  		// MOV $0,AX -> XOR AX,AX optimization.
   511  		if v.Aux != nil {
   512  			p.Mark |= x86.PRESERVEFLAGS
   513  		}
   514  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   515  		x := v.Reg()
   516  		p := s.Prog(v.Op.Asm())
   517  		p.From.Type = obj.TYPE_FCONST
   518  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   519  		p.To.Type = obj.TYPE_REG
   520  		p.To.Reg = x
   521  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   522  		p := s.Prog(v.Op.Asm())
   523  		p.From.Type = obj.TYPE_MEM
   524  		p.From.Reg = v.Args[0].Reg()
   525  		gc.AddAux(&p.From, v)
   526  		p.To.Type = obj.TYPE_REG
   527  		p.To.Reg = v.Reg()
   528  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
   529  		p := s.Prog(v.Op.Asm())
   530  		p.From.Type = obj.TYPE_MEM
   531  		p.From.Reg = v.Args[0].Reg()
   532  		gc.AddAux(&p.From, v)
   533  		p.From.Scale = 8
   534  		p.From.Index = v.Args[1].Reg()
   535  		p.To.Type = obj.TYPE_REG
   536  		p.To.Reg = v.Reg()
   537  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   538  		p := s.Prog(v.Op.Asm())
   539  		p.From.Type = obj.TYPE_MEM
   540  		p.From.Reg = v.Args[0].Reg()
   541  		gc.AddAux(&p.From, v)
   542  		p.From.Scale = 4
   543  		p.From.Index = v.Args[1].Reg()
   544  		p.To.Type = obj.TYPE_REG
   545  		p.To.Reg = v.Reg()
   546  	case ssa.OpAMD64MOVWloadidx2:
   547  		p := s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_MEM
   549  		p.From.Reg = v.Args[0].Reg()
   550  		gc.AddAux(&p.From, v)
   551  		p.From.Scale = 2
   552  		p.From.Index = v.Args[1].Reg()
   553  		p.To.Type = obj.TYPE_REG
   554  		p.To.Reg = v.Reg()
   555  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   556  		r := v.Args[0].Reg()
   557  		i := v.Args[1].Reg()
   558  		if i == x86.REG_SP {
   559  			r, i = i, r
   560  		}
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_MEM
   563  		p.From.Reg = r
   564  		p.From.Scale = 1
   565  		p.From.Index = i
   566  		gc.AddAux(&p.From, v)
   567  		p.To.Type = obj.TYPE_REG
   568  		p.To.Reg = v.Reg()
   569  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_REG
   572  		p.From.Reg = v.Args[1].Reg()
   573  		p.To.Type = obj.TYPE_MEM
   574  		p.To.Reg = v.Args[0].Reg()
   575  		gc.AddAux(&p.To, v)
   576  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_REG
   579  		p.From.Reg = v.Args[2].Reg()
   580  		p.To.Type = obj.TYPE_MEM
   581  		p.To.Reg = v.Args[0].Reg()
   582  		p.To.Scale = 8
   583  		p.To.Index = v.Args[1].Reg()
   584  		gc.AddAux(&p.To, v)
   585  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   586  		p := s.Prog(v.Op.Asm())
   587  		p.From.Type = obj.TYPE_REG
   588  		p.From.Reg = v.Args[2].Reg()
   589  		p.To.Type = obj.TYPE_MEM
   590  		p.To.Reg = v.Args[0].Reg()
   591  		p.To.Scale = 4
   592  		p.To.Index = v.Args[1].Reg()
   593  		gc.AddAux(&p.To, v)
   594  	case ssa.OpAMD64MOVWstoreidx2:
   595  		p := s.Prog(v.Op.Asm())
   596  		p.From.Type = obj.TYPE_REG
   597  		p.From.Reg = v.Args[2].Reg()
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = v.Args[0].Reg()
   600  		p.To.Scale = 2
   601  		p.To.Index = v.Args[1].Reg()
   602  		gc.AddAux(&p.To, v)
   603  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   604  		r := v.Args[0].Reg()
   605  		i := v.Args[1].Reg()
   606  		if i == x86.REG_SP {
   607  			r, i = i, r
   608  		}
   609  		p := s.Prog(v.Op.Asm())
   610  		p.From.Type = obj.TYPE_REG
   611  		p.From.Reg = v.Args[2].Reg()
   612  		p.To.Type = obj.TYPE_MEM
   613  		p.To.Reg = r
   614  		p.To.Scale = 1
   615  		p.To.Index = i
   616  		gc.AddAux(&p.To, v)
   617  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   618  		p := s.Prog(v.Op.Asm())
   619  		p.From.Type = obj.TYPE_CONST
   620  		sc := v.AuxValAndOff()
   621  		p.From.Offset = sc.Val()
   622  		p.To.Type = obj.TYPE_MEM
   623  		p.To.Reg = v.Args[0].Reg()
   624  		gc.AddAux2(&p.To, v, sc.Off())
   625  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   626  		p := s.Prog(v.Op.Asm())
   627  		p.From.Type = obj.TYPE_CONST
   628  		sc := v.AuxValAndOff()
   629  		p.From.Offset = sc.Val()
   630  		r := v.Args[0].Reg()
   631  		i := v.Args[1].Reg()
   632  		switch v.Op {
   633  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   634  			p.To.Scale = 1
   635  			if i == x86.REG_SP {
   636  				r, i = i, r
   637  			}
   638  		case ssa.OpAMD64MOVWstoreconstidx2:
   639  			p.To.Scale = 2
   640  		case ssa.OpAMD64MOVLstoreconstidx4:
   641  			p.To.Scale = 4
   642  		case ssa.OpAMD64MOVQstoreconstidx8:
   643  			p.To.Scale = 8
   644  		}
   645  		p.To.Type = obj.TYPE_MEM
   646  		p.To.Reg = r
   647  		p.To.Index = i
   648  		gc.AddAux2(&p.To, v, sc.Off())
   649  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   650  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   651  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   652  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   653  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   654  		r := v.Reg()
   655  		// Break false dependency on destination register.
   656  		opregreg(s, x86.AXORPS, r, r)
   657  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   658  	case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
   659  		ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
   660  		ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
   661  		ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
   662  		p := s.Prog(v.Op.Asm())
   663  		p.From.Type = obj.TYPE_MEM
   664  		p.From.Reg = v.Args[1].Reg()
   665  		gc.AddAux(&p.From, v)
   666  		p.To.Type = obj.TYPE_REG
   667  		p.To.Reg = v.Reg()
   668  		if v.Reg() != v.Args[0].Reg() {
   669  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   670  		}
   671  	case ssa.OpAMD64DUFFZERO:
   672  		off := duffStart(v.AuxInt)
   673  		adj := duffAdj(v.AuxInt)
   674  		var p *obj.Prog
   675  		if adj != 0 {
   676  			p = s.Prog(x86.ALEAQ)
   677  			p.From.Type = obj.TYPE_MEM
   678  			p.From.Offset = adj
   679  			p.From.Reg = x86.REG_DI
   680  			p.To.Type = obj.TYPE_REG
   681  			p.To.Reg = x86.REG_DI
   682  		}
   683  		p = s.Prog(obj.ADUFFZERO)
   684  		p.To.Type = obj.TYPE_ADDR
   685  		p.To.Sym = gc.Duffzero
   686  		p.To.Offset = off
   687  	case ssa.OpAMD64MOVOconst:
   688  		if v.AuxInt != 0 {
   689  			v.Fatalf("MOVOconst can only do constant=0")
   690  		}
   691  		r := v.Reg()
   692  		opregreg(s, x86.AXORPS, r, r)
   693  	case ssa.OpAMD64DUFFCOPY:
   694  		p := s.Prog(obj.ADUFFCOPY)
   695  		p.To.Type = obj.TYPE_ADDR
   696  		p.To.Sym = gc.Duffcopy
   697  		p.To.Offset = v.AuxInt
   698  
   699  	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   700  		if v.Type.IsMemory() {
   701  			return
   702  		}
   703  		x := v.Args[0].Reg()
   704  		y := v.Reg()
   705  		if x != y {
   706  			opregreg(s, moveByType(v.Type), y, x)
   707  		}
   708  	case ssa.OpLoadReg:
   709  		if v.Type.IsFlags() {
   710  			v.Fatalf("load flags not implemented: %v", v.LongString())
   711  			return
   712  		}
   713  		p := s.Prog(loadByType(v.Type))
   714  		gc.AddrAuto(&p.From, v.Args[0])
   715  		p.To.Type = obj.TYPE_REG
   716  		p.To.Reg = v.Reg()
   717  
   718  	case ssa.OpStoreReg:
   719  		if v.Type.IsFlags() {
   720  			v.Fatalf("store flags not implemented: %v", v.LongString())
   721  			return
   722  		}
   723  		p := s.Prog(storeByType(v.Type))
   724  		p.From.Type = obj.TYPE_REG
   725  		p.From.Reg = v.Args[0].Reg()
   726  		gc.AddrAuto(&p.To, v)
   727  	case ssa.OpAMD64LoweredGetClosurePtr:
   728  		// Closure pointer is DX.
   729  		gc.CheckLoweredGetClosurePtr(v)
   730  	case ssa.OpAMD64LoweredGetG:
   731  		r := v.Reg()
   732  		// See the comments in cmd/internal/obj/x86/obj6.go
   733  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   734  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   735  			// MOVQ (TLS), r
   736  			p := s.Prog(x86.AMOVQ)
   737  			p.From.Type = obj.TYPE_MEM
   738  			p.From.Reg = x86.REG_TLS
   739  			p.To.Type = obj.TYPE_REG
   740  			p.To.Reg = r
   741  		} else {
   742  			// MOVQ TLS, r
   743  			// MOVQ (r)(TLS*1), r
   744  			p := s.Prog(x86.AMOVQ)
   745  			p.From.Type = obj.TYPE_REG
   746  			p.From.Reg = x86.REG_TLS
   747  			p.To.Type = obj.TYPE_REG
   748  			p.To.Reg = r
   749  			q := s.Prog(x86.AMOVQ)
   750  			q.From.Type = obj.TYPE_MEM
   751  			q.From.Reg = r
   752  			q.From.Index = x86.REG_TLS
   753  			q.From.Scale = 1
   754  			q.To.Type = obj.TYPE_REG
   755  			q.To.Reg = r
   756  		}
   757  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   758  		s.Call(v)
   759  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   760  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   761  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   762  		r := v.Reg()
   763  		if r != v.Args[0].Reg() {
   764  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   765  		}
   766  		p := s.Prog(v.Op.Asm())
   767  		p.To.Type = obj.TYPE_REG
   768  		p.To.Reg = r
   769  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
   770  		p := s.Prog(v.Op.Asm())
   771  		p.From.Type = obj.TYPE_REG
   772  		p.From.Reg = v.Args[0].Reg()
   773  		p.To.Type = obj.TYPE_REG
   774  		p.To.Reg = v.Reg0()
   775  	case ssa.OpAMD64SQRTSD:
   776  		p := s.Prog(v.Op.Asm())
   777  		p.From.Type = obj.TYPE_REG
   778  		p.From.Reg = v.Args[0].Reg()
   779  		p.To.Type = obj.TYPE_REG
   780  		p.To.Reg = v.Reg()
   781  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
   782  		if v.Args[0].Reg() != v.Reg() {
   783  			// POPCNT on Intel has a false dependency on the destination register.
   784  			// Zero the destination to break the dependency.
   785  			p := s.Prog(x86.AMOVQ)
   786  			p.From.Type = obj.TYPE_CONST
   787  			p.From.Offset = 0
   788  			p.To.Type = obj.TYPE_REG
   789  			p.To.Reg = v.Reg()
   790  		}
   791  		p := s.Prog(v.Op.Asm())
   792  		p.From.Type = obj.TYPE_REG
   793  		p.From.Reg = v.Args[0].Reg()
   794  		p.To.Type = obj.TYPE_REG
   795  		p.To.Reg = v.Reg()
   796  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   797  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   798  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   799  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   800  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   801  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   802  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   803  		p := s.Prog(v.Op.Asm())
   804  		p.To.Type = obj.TYPE_REG
   805  		p.To.Reg = v.Reg()
   806  
   807  	case ssa.OpAMD64SETNEF:
   808  		p := s.Prog(v.Op.Asm())
   809  		p.To.Type = obj.TYPE_REG
   810  		p.To.Reg = v.Reg()
   811  		q := s.Prog(x86.ASETPS)
   812  		q.To.Type = obj.TYPE_REG
   813  		q.To.Reg = x86.REG_AX
   814  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   815  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   816  
   817  	case ssa.OpAMD64SETEQF:
   818  		p := s.Prog(v.Op.Asm())
   819  		p.To.Type = obj.TYPE_REG
   820  		p.To.Reg = v.Reg()
   821  		q := s.Prog(x86.ASETPC)
   822  		q.To.Type = obj.TYPE_REG
   823  		q.To.Reg = x86.REG_AX
   824  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   825  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   826  
   827  	case ssa.OpAMD64InvertFlags:
   828  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   829  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   830  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   831  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   832  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   833  	case ssa.OpAMD64REPSTOSQ:
   834  		s.Prog(x86.AREP)
   835  		s.Prog(x86.ASTOSQ)
   836  	case ssa.OpAMD64REPMOVSQ:
   837  		s.Prog(x86.AREP)
   838  		s.Prog(x86.AMOVSQ)
   839  	case ssa.OpAMD64LoweredNilCheck:
   840  		// Issue a load which will fault if the input is nil.
   841  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   842  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   843  		// but it doesn't have false dependency on AX.
   844  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   845  		// That trades clobbering flags for clobbering a register.
   846  		p := s.Prog(x86.ATESTB)
   847  		p.From.Type = obj.TYPE_REG
   848  		p.From.Reg = x86.REG_AX
   849  		p.To.Type = obj.TYPE_MEM
   850  		p.To.Reg = v.Args[0].Reg()
   851  		gc.AddAux(&p.To, v)
   852  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   853  			gc.Warnl(v.Pos, "generated nil check")
   854  		}
   855  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   856  		p := s.Prog(v.Op.Asm())
   857  		p.From.Type = obj.TYPE_MEM
   858  		p.From.Reg = v.Args[0].Reg()
   859  		gc.AddAux(&p.From, v)
   860  		p.To.Type = obj.TYPE_REG
   861  		p.To.Reg = v.Reg0()
   862  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   863  		r := v.Reg0()
   864  		if r != v.Args[0].Reg() {
   865  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   866  		}
   867  		p := s.Prog(v.Op.Asm())
   868  		p.From.Type = obj.TYPE_REG
   869  		p.From.Reg = r
   870  		p.To.Type = obj.TYPE_MEM
   871  		p.To.Reg = v.Args[1].Reg()
   872  		gc.AddAux(&p.To, v)
   873  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   874  		r := v.Reg0()
   875  		if r != v.Args[0].Reg() {
   876  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   877  		}
   878  		s.Prog(x86.ALOCK)
   879  		p := s.Prog(v.Op.Asm())
   880  		p.From.Type = obj.TYPE_REG
   881  		p.From.Reg = r
   882  		p.To.Type = obj.TYPE_MEM
   883  		p.To.Reg = v.Args[1].Reg()
   884  		gc.AddAux(&p.To, v)
   885  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   886  		if v.Args[1].Reg() != x86.REG_AX {
   887  			v.Fatalf("input[1] not in AX %s", v.LongString())
   888  		}
   889  		s.Prog(x86.ALOCK)
   890  		p := s.Prog(v.Op.Asm())
   891  		p.From.Type = obj.TYPE_REG
   892  		p.From.Reg = v.Args[2].Reg()
   893  		p.To.Type = obj.TYPE_MEM
   894  		p.To.Reg = v.Args[0].Reg()
   895  		gc.AddAux(&p.To, v)
   896  		p = s.Prog(x86.ASETEQ)
   897  		p.To.Type = obj.TYPE_REG
   898  		p.To.Reg = v.Reg0()
   899  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   900  		s.Prog(x86.ALOCK)
   901  		p := s.Prog(v.Op.Asm())
   902  		p.From.Type = obj.TYPE_REG
   903  		p.From.Reg = v.Args[1].Reg()
   904  		p.To.Type = obj.TYPE_MEM
   905  		p.To.Reg = v.Args[0].Reg()
   906  		gc.AddAux(&p.To, v)
   907  	case ssa.OpClobber:
   908  		p := s.Prog(x86.AMOVL)
   909  		p.From.Type = obj.TYPE_CONST
   910  		p.From.Offset = 0xdeaddead
   911  		p.To.Type = obj.TYPE_MEM
   912  		p.To.Reg = x86.REG_SP
   913  		gc.AddAux(&p.To, v)
   914  		p = s.Prog(x86.AMOVL)
   915  		p.From.Type = obj.TYPE_CONST
   916  		p.From.Offset = 0xdeaddead
   917  		p.To.Type = obj.TYPE_MEM
   918  		p.To.Reg = x86.REG_SP
   919  		gc.AddAux(&p.To, v)
   920  		p.To.Offset += 4
   921  	default:
   922  		v.Fatalf("genValue not implemented: %s", v.LongString())
   923  	}
   924  }
   925  
   926  var blockJump = [...]struct {
   927  	asm, invasm obj.As
   928  }{
   929  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   930  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   931  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   932  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   933  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   934  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   935  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   936  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   937  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   938  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   939  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   940  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   941  }
   942  
   943  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   944  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   945  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   946  }
   947  var nefJumps = [2][2]gc.FloatingEQNEJump{
   948  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   949  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   950  }
   951  
   952  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   953  	switch b.Kind {
   954  	case ssa.BlockPlain:
   955  		if b.Succs[0].Block() != next {
   956  			p := s.Prog(obj.AJMP)
   957  			p.To.Type = obj.TYPE_BRANCH
   958  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   959  		}
   960  	case ssa.BlockDefer:
   961  		// defer returns in rax:
   962  		// 0 if we should continue executing
   963  		// 1 if we should jump to deferreturn call
   964  		p := s.Prog(x86.ATESTL)
   965  		p.From.Type = obj.TYPE_REG
   966  		p.From.Reg = x86.REG_AX
   967  		p.To.Type = obj.TYPE_REG
   968  		p.To.Reg = x86.REG_AX
   969  		p = s.Prog(x86.AJNE)
   970  		p.To.Type = obj.TYPE_BRANCH
   971  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   972  		if b.Succs[0].Block() != next {
   973  			p := s.Prog(obj.AJMP)
   974  			p.To.Type = obj.TYPE_BRANCH
   975  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   976  		}
   977  	case ssa.BlockExit:
   978  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   979  	case ssa.BlockRet:
   980  		s.Prog(obj.ARET)
   981  	case ssa.BlockRetJmp:
   982  		p := s.Prog(obj.AJMP)
   983  		p.To.Type = obj.TYPE_MEM
   984  		p.To.Name = obj.NAME_EXTERN
   985  		p.To.Sym = b.Aux.(*obj.LSym)
   986  
   987  	case ssa.BlockAMD64EQF:
   988  		s.FPJump(b, next, &eqfJumps)
   989  
   990  	case ssa.BlockAMD64NEF:
   991  		s.FPJump(b, next, &nefJumps)
   992  
   993  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
   994  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
   995  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
   996  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
   997  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
   998  		jmp := blockJump[b.Kind]
   999  		var p *obj.Prog
  1000  		switch next {
  1001  		case b.Succs[0].Block():
  1002  			p = s.Prog(jmp.invasm)
  1003  			p.To.Type = obj.TYPE_BRANCH
  1004  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1005  		case b.Succs[1].Block():
  1006  			p = s.Prog(jmp.asm)
  1007  			p.To.Type = obj.TYPE_BRANCH
  1008  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1009  		default:
  1010  			p = s.Prog(jmp.asm)
  1011  			p.To.Type = obj.TYPE_BRANCH
  1012  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1013  			q := s.Prog(obj.AJMP)
  1014  			q.To.Type = obj.TYPE_BRANCH
  1015  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
  1016  		}
  1017  
  1018  	default:
  1019  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1020  	}
  1021  }