github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/internal/obj"
    14  	"cmd/internal/obj/x86"
    15  )
    16  
    17  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    18  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    19  	flive := b.FlagsLiveAtEnd
    20  	if b.Control != nil && b.Control.Type.IsFlags() {
    21  		flive = true
    22  	}
    23  	for i := len(b.Values) - 1; i >= 0; i-- {
    24  		v := b.Values[i]
    25  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    26  			// The "mark" is any non-nil Aux value.
    27  			v.Aux = v
    28  		}
    29  		if v.Type.IsFlags() {
    30  			flive = false
    31  		}
    32  		for _, a := range v.Args {
    33  			if a.Type.IsFlags() {
    34  				flive = true
    35  			}
    36  		}
    37  	}
    38  }
    39  
    40  // loadByType returns the load instruction of the given type.
    41  func loadByType(t ssa.Type) obj.As {
    42  	// Avoid partial register write
    43  	if !t.IsFloat() && t.Size() <= 2 {
    44  		if t.Size() == 1 {
    45  			return x86.AMOVBLZX
    46  		} else {
    47  			return x86.AMOVWLZX
    48  		}
    49  	}
    50  	// Otherwise, there's no difference between load and store opcodes.
    51  	return storeByType(t)
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t ssa.Type) obj.As {
    56  	width := t.Size()
    57  	if t.IsFloat() {
    58  		switch width {
    59  		case 4:
    60  			return x86.AMOVSS
    61  		case 8:
    62  			return x86.AMOVSD
    63  		}
    64  	} else {
    65  		switch width {
    66  		case 1:
    67  			return x86.AMOVB
    68  		case 2:
    69  			return x86.AMOVW
    70  		case 4:
    71  			return x86.AMOVL
    72  		case 8:
    73  			return x86.AMOVQ
    74  		}
    75  	}
    76  	panic("bad store type")
    77  }
    78  
    79  // moveByType returns the reg->reg move instruction of the given type.
    80  func moveByType(t ssa.Type) obj.As {
    81  	if t.IsFloat() {
    82  		// Moving the whole sse2 register is faster
    83  		// than moving just the correct low portion of it.
    84  		// There is no xmm->xmm move with 1 byte opcode,
    85  		// so use movups, which has 2 byte opcode.
    86  		return x86.AMOVUPS
    87  	} else {
    88  		switch t.Size() {
    89  		case 1:
    90  			// Avoids partial register write
    91  			return x86.AMOVL
    92  		case 2:
    93  			return x86.AMOVL
    94  		case 4:
    95  			return x86.AMOVL
    96  		case 8:
    97  			return x86.AMOVQ
    98  		case 16:
    99  			return x86.AMOVUPS // int128s are in SSE registers
   100  		default:
   101  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   102  		}
   103  	}
   104  }
   105  
   106  // opregreg emits instructions for
   107  //     dest := dest(To) op src(From)
   108  // and also returns the created obj.Prog so it
   109  // may be further adjusted (offset, scale, etc).
   110  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   111  	p := s.Prog(op)
   112  	p.From.Type = obj.TYPE_REG
   113  	p.To.Type = obj.TYPE_REG
   114  	p.To.Reg = dest
   115  	p.From.Reg = src
   116  	return p
   117  }
   118  
   119  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
   120  // See runtime/mkduff.go.
   121  func duffStart(size int64) int64 {
   122  	x, _ := duff(size)
   123  	return x
   124  }
   125  func duffAdj(size int64) int64 {
   126  	_, x := duff(size)
   127  	return x
   128  }
   129  
   130  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   131  // required to use the duffzero mechanism for a block of the given size.
   132  func duff(size int64) (int64, int64) {
   133  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   134  		panic("bad duffzero size")
   135  	}
   136  	steps := size / dzClearStep
   137  	blocks := steps / dzBlockLen
   138  	steps %= dzBlockLen
   139  	off := dzBlockSize * (dzBlocks - blocks)
   140  	var adj int64
   141  	if steps != 0 {
   142  		off -= dzAddSize
   143  		off -= dzMovSize * steps
   144  		adj -= dzClearStep * (dzBlockLen - steps)
   145  	}
   146  	return off, adj
   147  }
   148  
   149  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   150  	switch v.Op {
   151  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   152  		r := v.Reg()
   153  		r1 := v.Args[0].Reg()
   154  		r2 := v.Args[1].Reg()
   155  		switch {
   156  		case r == r1:
   157  			p := s.Prog(v.Op.Asm())
   158  			p.From.Type = obj.TYPE_REG
   159  			p.From.Reg = r2
   160  			p.To.Type = obj.TYPE_REG
   161  			p.To.Reg = r
   162  		case r == r2:
   163  			p := s.Prog(v.Op.Asm())
   164  			p.From.Type = obj.TYPE_REG
   165  			p.From.Reg = r1
   166  			p.To.Type = obj.TYPE_REG
   167  			p.To.Reg = r
   168  		default:
   169  			var asm obj.As
   170  			if v.Op == ssa.OpAMD64ADDQ {
   171  				asm = x86.ALEAQ
   172  			} else {
   173  				asm = x86.ALEAL
   174  			}
   175  			p := s.Prog(asm)
   176  			p.From.Type = obj.TYPE_MEM
   177  			p.From.Reg = r1
   178  			p.From.Scale = 1
   179  			p.From.Index = r2
   180  			p.To.Type = obj.TYPE_REG
   181  			p.To.Reg = r
   182  		}
   183  	// 2-address opcode arithmetic
   184  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   185  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   186  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   187  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   188  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   189  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   190  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   191  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   192  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   193  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   194  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   195  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   196  		ssa.OpAMD64PXOR:
   197  		r := v.Reg()
   198  		if r != v.Args[0].Reg() {
   199  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   200  		}
   201  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   202  
   203  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   204  		// Arg[0] (the dividend) is in AX.
   205  		// Arg[1] (the divisor) can be in any other register.
   206  		// Result[0] (the quotient) is in AX.
   207  		// Result[1] (the remainder) is in DX.
   208  		r := v.Args[1].Reg()
   209  
   210  		// Zero extend dividend.
   211  		c := s.Prog(x86.AXORL)
   212  		c.From.Type = obj.TYPE_REG
   213  		c.From.Reg = x86.REG_DX
   214  		c.To.Type = obj.TYPE_REG
   215  		c.To.Reg = x86.REG_DX
   216  
   217  		// Issue divide.
   218  		p := s.Prog(v.Op.Asm())
   219  		p.From.Type = obj.TYPE_REG
   220  		p.From.Reg = r
   221  
   222  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   223  		// Arg[0] (the dividend) is in AX.
   224  		// Arg[1] (the divisor) can be in any other register.
   225  		// Result[0] (the quotient) is in AX.
   226  		// Result[1] (the remainder) is in DX.
   227  		r := v.Args[1].Reg()
   228  
   229  		// CPU faults upon signed overflow, which occurs when the most
   230  		// negative int is divided by -1. Handle divide by -1 as a special case.
   231  		var c *obj.Prog
   232  		switch v.Op {
   233  		case ssa.OpAMD64DIVQ:
   234  			c = s.Prog(x86.ACMPQ)
   235  		case ssa.OpAMD64DIVL:
   236  			c = s.Prog(x86.ACMPL)
   237  		case ssa.OpAMD64DIVW:
   238  			c = s.Prog(x86.ACMPW)
   239  		}
   240  		c.From.Type = obj.TYPE_REG
   241  		c.From.Reg = r
   242  		c.To.Type = obj.TYPE_CONST
   243  		c.To.Offset = -1
   244  		j1 := s.Prog(x86.AJEQ)
   245  		j1.To.Type = obj.TYPE_BRANCH
   246  
   247  		// Sign extend dividend.
   248  		switch v.Op {
   249  		case ssa.OpAMD64DIVQ:
   250  			s.Prog(x86.ACQO)
   251  		case ssa.OpAMD64DIVL:
   252  			s.Prog(x86.ACDQ)
   253  		case ssa.OpAMD64DIVW:
   254  			s.Prog(x86.ACWD)
   255  		}
   256  
   257  		// Issue divide.
   258  		p := s.Prog(v.Op.Asm())
   259  		p.From.Type = obj.TYPE_REG
   260  		p.From.Reg = r
   261  
   262  		// Skip over -1 fixup code.
   263  		j2 := s.Prog(obj.AJMP)
   264  		j2.To.Type = obj.TYPE_BRANCH
   265  
   266  		// Issue -1 fixup code.
   267  		// n / -1 = -n
   268  		n1 := s.Prog(x86.ANEGQ)
   269  		n1.To.Type = obj.TYPE_REG
   270  		n1.To.Reg = x86.REG_AX
   271  
   272  		// n % -1 == 0
   273  		n2 := s.Prog(x86.AXORL)
   274  		n2.From.Type = obj.TYPE_REG
   275  		n2.From.Reg = x86.REG_DX
   276  		n2.To.Type = obj.TYPE_REG
   277  		n2.To.Reg = x86.REG_DX
   278  
   279  		// TODO(khr): issue only the -1 fixup code we need.
   280  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   281  
   282  		j1.To.Val = n1
   283  		j2.To.Val = s.Pc()
   284  
   285  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   286  		// the frontend rewrites constant division by 8/16/32 bit integers into
   287  		// HMUL by a constant
   288  		// SSA rewrites generate the 64 bit versions
   289  
   290  		// Arg[0] is already in AX as it's the only register we allow
   291  		// and DX is the only output we care about (the high bits)
   292  		p := s.Prog(v.Op.Asm())
   293  		p.From.Type = obj.TYPE_REG
   294  		p.From.Reg = v.Args[1].Reg()
   295  
   296  		// IMULB puts the high portion in AH instead of DL,
   297  		// so move it to DL for consistency
   298  		if v.Type.Size() == 1 {
   299  			m := s.Prog(x86.AMOVB)
   300  			m.From.Type = obj.TYPE_REG
   301  			m.From.Reg = x86.REG_AH
   302  			m.To.Type = obj.TYPE_REG
   303  			m.To.Reg = x86.REG_DX
   304  		}
   305  
   306  	case ssa.OpAMD64MULQU2:
   307  		// Arg[0] is already in AX as it's the only register we allow
   308  		// results hi in DX, lo in AX
   309  		p := s.Prog(v.Op.Asm())
   310  		p.From.Type = obj.TYPE_REG
   311  		p.From.Reg = v.Args[1].Reg()
   312  
   313  	case ssa.OpAMD64DIVQU2:
   314  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   315  		// results q in AX, r in DX
   316  		p := s.Prog(v.Op.Asm())
   317  		p.From.Type = obj.TYPE_REG
   318  		p.From.Reg = v.Args[2].Reg()
   319  
   320  	case ssa.OpAMD64AVGQU:
   321  		// compute (x+y)/2 unsigned.
   322  		// Do a 64-bit add, the overflow goes into the carry.
   323  		// Shift right once and pull the carry back into the 63rd bit.
   324  		r := v.Reg()
   325  		if r != v.Args[0].Reg() {
   326  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   327  		}
   328  		p := s.Prog(x86.AADDQ)
   329  		p.From.Type = obj.TYPE_REG
   330  		p.To.Type = obj.TYPE_REG
   331  		p.To.Reg = r
   332  		p.From.Reg = v.Args[1].Reg()
   333  		p = s.Prog(x86.ARCRQ)
   334  		p.From.Type = obj.TYPE_CONST
   335  		p.From.Offset = 1
   336  		p.To.Type = obj.TYPE_REG
   337  		p.To.Reg = r
   338  
   339  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   340  		r := v.Reg()
   341  		a := v.Args[0].Reg()
   342  		if r == a {
   343  			if v.AuxInt == 1 {
   344  				var asm obj.As
   345  				// Software optimization manual recommends add $1,reg.
   346  				// But inc/dec is 1 byte smaller. ICC always uses inc
   347  				// Clang/GCC choose depending on flags, but prefer add.
   348  				// Experiments show that inc/dec is both a little faster
   349  				// and make a binary a little smaller.
   350  				if v.Op == ssa.OpAMD64ADDQconst {
   351  					asm = x86.AINCQ
   352  				} else {
   353  					asm = x86.AINCL
   354  				}
   355  				p := s.Prog(asm)
   356  				p.To.Type = obj.TYPE_REG
   357  				p.To.Reg = r
   358  				return
   359  			}
   360  			if v.AuxInt == -1 {
   361  				var asm obj.As
   362  				if v.Op == ssa.OpAMD64ADDQconst {
   363  					asm = x86.ADECQ
   364  				} else {
   365  					asm = x86.ADECL
   366  				}
   367  				p := s.Prog(asm)
   368  				p.To.Type = obj.TYPE_REG
   369  				p.To.Reg = r
   370  				return
   371  			}
   372  			p := s.Prog(v.Op.Asm())
   373  			p.From.Type = obj.TYPE_CONST
   374  			p.From.Offset = v.AuxInt
   375  			p.To.Type = obj.TYPE_REG
   376  			p.To.Reg = r
   377  			return
   378  		}
   379  		var asm obj.As
   380  		if v.Op == ssa.OpAMD64ADDQconst {
   381  			asm = x86.ALEAQ
   382  		} else {
   383  			asm = x86.ALEAL
   384  		}
   385  		p := s.Prog(asm)
   386  		p.From.Type = obj.TYPE_MEM
   387  		p.From.Reg = a
   388  		p.From.Offset = v.AuxInt
   389  		p.To.Type = obj.TYPE_REG
   390  		p.To.Reg = r
   391  
   392  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   393  		r := v.Reg()
   394  		if r != v.Args[0].Reg() {
   395  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   396  		}
   397  		p := s.Prog(v.Op.Asm())
   398  		p.From.Type = obj.TYPE_REG
   399  		p.From.Reg = v.Args[1].Reg()
   400  		p.To.Type = obj.TYPE_REG
   401  		p.To.Reg = r
   402  
   403  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   404  		r := v.Reg()
   405  		if r != v.Args[0].Reg() {
   406  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   407  		}
   408  		p := s.Prog(v.Op.Asm())
   409  		p.From.Type = obj.TYPE_CONST
   410  		p.From.Offset = v.AuxInt
   411  		p.To.Type = obj.TYPE_REG
   412  		p.To.Reg = r
   413  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   414  		// then we don't need to use resultInArg0 for these ops.
   415  		//p.From3 = new(obj.Addr)
   416  		//p.From3.Type = obj.TYPE_REG
   417  		//p.From3.Reg = v.Args[0].Reg()
   418  
   419  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   420  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   421  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   422  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   423  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   424  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   425  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   426  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   427  		r := v.Reg()
   428  		if r != v.Args[0].Reg() {
   429  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   430  		}
   431  		p := s.Prog(v.Op.Asm())
   432  		p.From.Type = obj.TYPE_CONST
   433  		p.From.Offset = v.AuxInt
   434  		p.To.Type = obj.TYPE_REG
   435  		p.To.Reg = r
   436  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   437  		r := v.Reg()
   438  		p := s.Prog(v.Op.Asm())
   439  		p.From.Type = obj.TYPE_REG
   440  		p.From.Reg = r
   441  		p.To.Type = obj.TYPE_REG
   442  		p.To.Reg = r
   443  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   444  		r := v.Args[0].Reg()
   445  		i := v.Args[1].Reg()
   446  		p := s.Prog(x86.ALEAQ)
   447  		switch v.Op {
   448  		case ssa.OpAMD64LEAQ1:
   449  			p.From.Scale = 1
   450  			if i == x86.REG_SP {
   451  				r, i = i, r
   452  			}
   453  		case ssa.OpAMD64LEAQ2:
   454  			p.From.Scale = 2
   455  		case ssa.OpAMD64LEAQ4:
   456  			p.From.Scale = 4
   457  		case ssa.OpAMD64LEAQ8:
   458  			p.From.Scale = 8
   459  		}
   460  		p.From.Type = obj.TYPE_MEM
   461  		p.From.Reg = r
   462  		p.From.Index = i
   463  		gc.AddAux(&p.From, v)
   464  		p.To.Type = obj.TYPE_REG
   465  		p.To.Reg = v.Reg()
   466  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   467  		p := s.Prog(v.Op.Asm())
   468  		p.From.Type = obj.TYPE_MEM
   469  		p.From.Reg = v.Args[0].Reg()
   470  		gc.AddAux(&p.From, v)
   471  		p.To.Type = obj.TYPE_REG
   472  		p.To.Reg = v.Reg()
   473  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   474  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   475  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   476  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   477  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   478  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   479  		// must account for that right here.
   480  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   481  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   482  		p := s.Prog(v.Op.Asm())
   483  		p.From.Type = obj.TYPE_REG
   484  		p.From.Reg = v.Args[0].Reg()
   485  		p.To.Type = obj.TYPE_CONST
   486  		p.To.Offset = v.AuxInt
   487  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   488  		ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
   489  		p := s.Prog(v.Op.Asm())
   490  		p.From.Type = obj.TYPE_CONST
   491  		p.From.Offset = v.AuxInt
   492  		p.To.Type = obj.TYPE_REG
   493  		p.To.Reg = v.Args[0].Reg()
   494  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   495  		x := v.Reg()
   496  		p := s.Prog(v.Op.Asm())
   497  		p.From.Type = obj.TYPE_CONST
   498  		p.From.Offset = v.AuxInt
   499  		p.To.Type = obj.TYPE_REG
   500  		p.To.Reg = x
   501  		// If flags are live at this instruction, suppress the
   502  		// MOV $0,AX -> XOR AX,AX optimization.
   503  		if v.Aux != nil {
   504  			p.Mark |= x86.PRESERVEFLAGS
   505  		}
   506  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   507  		x := v.Reg()
   508  		p := s.Prog(v.Op.Asm())
   509  		p.From.Type = obj.TYPE_FCONST
   510  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = x
   513  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[0].Reg()
   517  		gc.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_MEM
   523  		p.From.Reg = v.Args[0].Reg()
   524  		gc.AddAux(&p.From, v)
   525  		p.From.Scale = 8
   526  		p.From.Index = v.Args[1].Reg()
   527  		p.To.Type = obj.TYPE_REG
   528  		p.To.Reg = v.Reg()
   529  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   530  		p := s.Prog(v.Op.Asm())
   531  		p.From.Type = obj.TYPE_MEM
   532  		p.From.Reg = v.Args[0].Reg()
   533  		gc.AddAux(&p.From, v)
   534  		p.From.Scale = 4
   535  		p.From.Index = v.Args[1].Reg()
   536  		p.To.Type = obj.TYPE_REG
   537  		p.To.Reg = v.Reg()
   538  	case ssa.OpAMD64MOVWloadidx2:
   539  		p := s.Prog(v.Op.Asm())
   540  		p.From.Type = obj.TYPE_MEM
   541  		p.From.Reg = v.Args[0].Reg()
   542  		gc.AddAux(&p.From, v)
   543  		p.From.Scale = 2
   544  		p.From.Index = v.Args[1].Reg()
   545  		p.To.Type = obj.TYPE_REG
   546  		p.To.Reg = v.Reg()
   547  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   548  		r := v.Args[0].Reg()
   549  		i := v.Args[1].Reg()
   550  		if i == x86.REG_SP {
   551  			r, i = i, r
   552  		}
   553  		p := s.Prog(v.Op.Asm())
   554  		p.From.Type = obj.TYPE_MEM
   555  		p.From.Reg = r
   556  		p.From.Scale = 1
   557  		p.From.Index = i
   558  		gc.AddAux(&p.From, v)
   559  		p.To.Type = obj.TYPE_REG
   560  		p.To.Reg = v.Reg()
   561  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   562  		p := s.Prog(v.Op.Asm())
   563  		p.From.Type = obj.TYPE_REG
   564  		p.From.Reg = v.Args[1].Reg()
   565  		p.To.Type = obj.TYPE_MEM
   566  		p.To.Reg = v.Args[0].Reg()
   567  		gc.AddAux(&p.To, v)
   568  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
   569  		p := s.Prog(v.Op.Asm())
   570  		p.From.Type = obj.TYPE_REG
   571  		p.From.Reg = v.Args[2].Reg()
   572  		p.To.Type = obj.TYPE_MEM
   573  		p.To.Reg = v.Args[0].Reg()
   574  		p.To.Scale = 8
   575  		p.To.Index = v.Args[1].Reg()
   576  		gc.AddAux(&p.To, v)
   577  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   578  		p := s.Prog(v.Op.Asm())
   579  		p.From.Type = obj.TYPE_REG
   580  		p.From.Reg = v.Args[2].Reg()
   581  		p.To.Type = obj.TYPE_MEM
   582  		p.To.Reg = v.Args[0].Reg()
   583  		p.To.Scale = 4
   584  		p.To.Index = v.Args[1].Reg()
   585  		gc.AddAux(&p.To, v)
   586  	case ssa.OpAMD64MOVWstoreidx2:
   587  		p := s.Prog(v.Op.Asm())
   588  		p.From.Type = obj.TYPE_REG
   589  		p.From.Reg = v.Args[2].Reg()
   590  		p.To.Type = obj.TYPE_MEM
   591  		p.To.Reg = v.Args[0].Reg()
   592  		p.To.Scale = 2
   593  		p.To.Index = v.Args[1].Reg()
   594  		gc.AddAux(&p.To, v)
   595  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   596  		r := v.Args[0].Reg()
   597  		i := v.Args[1].Reg()
   598  		if i == x86.REG_SP {
   599  			r, i = i, r
   600  		}
   601  		p := s.Prog(v.Op.Asm())
   602  		p.From.Type = obj.TYPE_REG
   603  		p.From.Reg = v.Args[2].Reg()
   604  		p.To.Type = obj.TYPE_MEM
   605  		p.To.Reg = r
   606  		p.To.Scale = 1
   607  		p.To.Index = i
   608  		gc.AddAux(&p.To, v)
   609  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   610  		p := s.Prog(v.Op.Asm())
   611  		p.From.Type = obj.TYPE_CONST
   612  		sc := v.AuxValAndOff()
   613  		p.From.Offset = sc.Val()
   614  		p.To.Type = obj.TYPE_MEM
   615  		p.To.Reg = v.Args[0].Reg()
   616  		gc.AddAux2(&p.To, v, sc.Off())
   617  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   618  		p := s.Prog(v.Op.Asm())
   619  		p.From.Type = obj.TYPE_CONST
   620  		sc := v.AuxValAndOff()
   621  		p.From.Offset = sc.Val()
   622  		r := v.Args[0].Reg()
   623  		i := v.Args[1].Reg()
   624  		switch v.Op {
   625  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   626  			p.To.Scale = 1
   627  			if i == x86.REG_SP {
   628  				r, i = i, r
   629  			}
   630  		case ssa.OpAMD64MOVWstoreconstidx2:
   631  			p.To.Scale = 2
   632  		case ssa.OpAMD64MOVLstoreconstidx4:
   633  			p.To.Scale = 4
   634  		case ssa.OpAMD64MOVQstoreconstidx8:
   635  			p.To.Scale = 8
   636  		}
   637  		p.To.Type = obj.TYPE_MEM
   638  		p.To.Reg = r
   639  		p.To.Index = i
   640  		gc.AddAux2(&p.To, v, sc.Off())
   641  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   642  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   643  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   644  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   645  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   646  		r := v.Reg()
   647  		// Break false dependency on destination register.
   648  		opregreg(s, x86.AXORPS, r, r)
   649  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   650  	case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
   651  		ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
   652  		ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
   653  		ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
   654  		p := s.Prog(v.Op.Asm())
   655  		p.From.Type = obj.TYPE_MEM
   656  		p.From.Reg = v.Args[1].Reg()
   657  		gc.AddAux(&p.From, v)
   658  		p.To.Type = obj.TYPE_REG
   659  		p.To.Reg = v.Reg()
   660  		if v.Reg() != v.Args[0].Reg() {
   661  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   662  		}
   663  	case ssa.OpAMD64DUFFZERO:
   664  		off := duffStart(v.AuxInt)
   665  		adj := duffAdj(v.AuxInt)
   666  		var p *obj.Prog
   667  		if adj != 0 {
   668  			p = s.Prog(x86.AADDQ)
   669  			p.From.Type = obj.TYPE_CONST
   670  			p.From.Offset = adj
   671  			p.To.Type = obj.TYPE_REG
   672  			p.To.Reg = x86.REG_DI
   673  		}
   674  		p = s.Prog(obj.ADUFFZERO)
   675  		p.To.Type = obj.TYPE_ADDR
   676  		p.To.Sym = gc.Duffzero
   677  		p.To.Offset = off
   678  	case ssa.OpAMD64MOVOconst:
   679  		if v.AuxInt != 0 {
   680  			v.Fatalf("MOVOconst can only do constant=0")
   681  		}
   682  		r := v.Reg()
   683  		opregreg(s, x86.AXORPS, r, r)
   684  	case ssa.OpAMD64DUFFCOPY:
   685  		p := s.Prog(obj.ADUFFCOPY)
   686  		p.To.Type = obj.TYPE_ADDR
   687  		p.To.Sym = gc.Duffcopy
   688  		p.To.Offset = v.AuxInt
   689  
   690  	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   691  		if v.Type.IsMemory() {
   692  			return
   693  		}
   694  		x := v.Args[0].Reg()
   695  		y := v.Reg()
   696  		if x != y {
   697  			opregreg(s, moveByType(v.Type), y, x)
   698  		}
   699  	case ssa.OpLoadReg:
   700  		if v.Type.IsFlags() {
   701  			v.Fatalf("load flags not implemented: %v", v.LongString())
   702  			return
   703  		}
   704  		p := s.Prog(loadByType(v.Type))
   705  		gc.AddrAuto(&p.From, v.Args[0])
   706  		p.To.Type = obj.TYPE_REG
   707  		p.To.Reg = v.Reg()
   708  
   709  	case ssa.OpStoreReg:
   710  		if v.Type.IsFlags() {
   711  			v.Fatalf("store flags not implemented: %v", v.LongString())
   712  			return
   713  		}
   714  		p := s.Prog(storeByType(v.Type))
   715  		p.From.Type = obj.TYPE_REG
   716  		p.From.Reg = v.Args[0].Reg()
   717  		gc.AddrAuto(&p.To, v)
   718  	case ssa.OpAMD64LoweredGetClosurePtr:
   719  		// Closure pointer is DX.
   720  		gc.CheckLoweredGetClosurePtr(v)
   721  	case ssa.OpAMD64LoweredGetG:
   722  		r := v.Reg()
   723  		// See the comments in cmd/internal/obj/x86/obj6.go
   724  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   725  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   726  			// MOVQ (TLS), r
   727  			p := s.Prog(x86.AMOVQ)
   728  			p.From.Type = obj.TYPE_MEM
   729  			p.From.Reg = x86.REG_TLS
   730  			p.To.Type = obj.TYPE_REG
   731  			p.To.Reg = r
   732  		} else {
   733  			// MOVQ TLS, r
   734  			// MOVQ (r)(TLS*1), r
   735  			p := s.Prog(x86.AMOVQ)
   736  			p.From.Type = obj.TYPE_REG
   737  			p.From.Reg = x86.REG_TLS
   738  			p.To.Type = obj.TYPE_REG
   739  			p.To.Reg = r
   740  			q := s.Prog(x86.AMOVQ)
   741  			q.From.Type = obj.TYPE_MEM
   742  			q.From.Reg = r
   743  			q.From.Index = x86.REG_TLS
   744  			q.From.Scale = 1
   745  			q.To.Type = obj.TYPE_REG
   746  			q.To.Reg = r
   747  		}
   748  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   749  		s.Call(v)
   750  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   751  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   752  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   753  		r := v.Reg()
   754  		if r != v.Args[0].Reg() {
   755  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   756  		}
   757  		p := s.Prog(v.Op.Asm())
   758  		p.To.Type = obj.TYPE_REG
   759  		p.To.Reg = r
   760  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
   761  		p := s.Prog(v.Op.Asm())
   762  		p.From.Type = obj.TYPE_REG
   763  		p.From.Reg = v.Args[0].Reg()
   764  		p.To.Type = obj.TYPE_REG
   765  		p.To.Reg = v.Reg0()
   766  	case ssa.OpAMD64SQRTSD:
   767  		p := s.Prog(v.Op.Asm())
   768  		p.From.Type = obj.TYPE_REG
   769  		p.From.Reg = v.Args[0].Reg()
   770  		p.To.Type = obj.TYPE_REG
   771  		p.To.Reg = v.Reg()
   772  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
   773  		if v.Args[0].Reg() != v.Reg() {
   774  			// POPCNT on Intel has a false dependency on the destination register.
   775  			// Zero the destination to break the dependency.
   776  			p := s.Prog(x86.AMOVQ)
   777  			p.From.Type = obj.TYPE_CONST
   778  			p.From.Offset = 0
   779  			p.To.Type = obj.TYPE_REG
   780  			p.To.Reg = v.Reg()
   781  		}
   782  		p := s.Prog(v.Op.Asm())
   783  		p.From.Type = obj.TYPE_REG
   784  		p.From.Reg = v.Args[0].Reg()
   785  		p.To.Type = obj.TYPE_REG
   786  		p.To.Reg = v.Reg()
   787  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   788  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   789  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   790  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   791  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   792  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   793  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   794  		p := s.Prog(v.Op.Asm())
   795  		p.To.Type = obj.TYPE_REG
   796  		p.To.Reg = v.Reg()
   797  
   798  	case ssa.OpAMD64SETNEF:
   799  		p := s.Prog(v.Op.Asm())
   800  		p.To.Type = obj.TYPE_REG
   801  		p.To.Reg = v.Reg()
   802  		q := s.Prog(x86.ASETPS)
   803  		q.To.Type = obj.TYPE_REG
   804  		q.To.Reg = x86.REG_AX
   805  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   806  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   807  
   808  	case ssa.OpAMD64SETEQF:
   809  		p := s.Prog(v.Op.Asm())
   810  		p.To.Type = obj.TYPE_REG
   811  		p.To.Reg = v.Reg()
   812  		q := s.Prog(x86.ASETPC)
   813  		q.To.Type = obj.TYPE_REG
   814  		q.To.Reg = x86.REG_AX
   815  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   816  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   817  
   818  	case ssa.OpAMD64InvertFlags:
   819  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   820  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   821  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   822  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   823  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   824  	case ssa.OpAMD64REPSTOSQ:
   825  		s.Prog(x86.AREP)
   826  		s.Prog(x86.ASTOSQ)
   827  	case ssa.OpAMD64REPMOVSQ:
   828  		s.Prog(x86.AREP)
   829  		s.Prog(x86.AMOVSQ)
   830  	case ssa.OpAMD64LoweredNilCheck:
   831  		// Issue a load which will fault if the input is nil.
   832  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   833  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   834  		// but it doesn't have false dependency on AX.
   835  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   836  		// That trades clobbering flags for clobbering a register.
   837  		p := s.Prog(x86.ATESTB)
   838  		p.From.Type = obj.TYPE_REG
   839  		p.From.Reg = x86.REG_AX
   840  		p.To.Type = obj.TYPE_MEM
   841  		p.To.Reg = v.Args[0].Reg()
   842  		gc.AddAux(&p.To, v)
   843  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   844  			gc.Warnl(v.Pos, "generated nil check")
   845  		}
   846  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   847  		p := s.Prog(v.Op.Asm())
   848  		p.From.Type = obj.TYPE_MEM
   849  		p.From.Reg = v.Args[0].Reg()
   850  		gc.AddAux(&p.From, v)
   851  		p.To.Type = obj.TYPE_REG
   852  		p.To.Reg = v.Reg0()
   853  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   854  		r := v.Reg0()
   855  		if r != v.Args[0].Reg() {
   856  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   857  		}
   858  		p := s.Prog(v.Op.Asm())
   859  		p.From.Type = obj.TYPE_REG
   860  		p.From.Reg = r
   861  		p.To.Type = obj.TYPE_MEM
   862  		p.To.Reg = v.Args[1].Reg()
   863  		gc.AddAux(&p.To, v)
   864  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   865  		r := v.Reg0()
   866  		if r != v.Args[0].Reg() {
   867  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   868  		}
   869  		s.Prog(x86.ALOCK)
   870  		p := s.Prog(v.Op.Asm())
   871  		p.From.Type = obj.TYPE_REG
   872  		p.From.Reg = r
   873  		p.To.Type = obj.TYPE_MEM
   874  		p.To.Reg = v.Args[1].Reg()
   875  		gc.AddAux(&p.To, v)
   876  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   877  		if v.Args[1].Reg() != x86.REG_AX {
   878  			v.Fatalf("input[1] not in AX %s", v.LongString())
   879  		}
   880  		s.Prog(x86.ALOCK)
   881  		p := s.Prog(v.Op.Asm())
   882  		p.From.Type = obj.TYPE_REG
   883  		p.From.Reg = v.Args[2].Reg()
   884  		p.To.Type = obj.TYPE_MEM
   885  		p.To.Reg = v.Args[0].Reg()
   886  		gc.AddAux(&p.To, v)
   887  		p = s.Prog(x86.ASETEQ)
   888  		p.To.Type = obj.TYPE_REG
   889  		p.To.Reg = v.Reg0()
   890  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   891  		s.Prog(x86.ALOCK)
   892  		p := s.Prog(v.Op.Asm())
   893  		p.From.Type = obj.TYPE_REG
   894  		p.From.Reg = v.Args[1].Reg()
   895  		p.To.Type = obj.TYPE_MEM
   896  		p.To.Reg = v.Args[0].Reg()
   897  		gc.AddAux(&p.To, v)
   898  	case ssa.OpClobber:
   899  		p := s.Prog(x86.AMOVL)
   900  		p.From.Type = obj.TYPE_CONST
   901  		p.From.Offset = 0xdeaddead
   902  		p.To.Type = obj.TYPE_MEM
   903  		p.To.Reg = x86.REG_SP
   904  		gc.AddAux(&p.To, v)
   905  		p = s.Prog(x86.AMOVL)
   906  		p.From.Type = obj.TYPE_CONST
   907  		p.From.Offset = 0xdeaddead
   908  		p.To.Type = obj.TYPE_MEM
   909  		p.To.Reg = x86.REG_SP
   910  		gc.AddAux(&p.To, v)
   911  		p.To.Offset += 4
   912  	default:
   913  		v.Fatalf("genValue not implemented: %s", v.LongString())
   914  	}
   915  }
   916  
   917  var blockJump = [...]struct {
   918  	asm, invasm obj.As
   919  }{
   920  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   921  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   922  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   923  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   924  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   925  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   926  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   927  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   928  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   929  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   930  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   931  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   932  }
   933  
   934  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   935  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   936  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   937  }
   938  var nefJumps = [2][2]gc.FloatingEQNEJump{
   939  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   940  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   941  }
   942  
   943  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   944  	switch b.Kind {
   945  	case ssa.BlockPlain:
   946  		if b.Succs[0].Block() != next {
   947  			p := s.Prog(obj.AJMP)
   948  			p.To.Type = obj.TYPE_BRANCH
   949  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   950  		}
   951  	case ssa.BlockDefer:
   952  		// defer returns in rax:
   953  		// 0 if we should continue executing
   954  		// 1 if we should jump to deferreturn call
   955  		p := s.Prog(x86.ATESTL)
   956  		p.From.Type = obj.TYPE_REG
   957  		p.From.Reg = x86.REG_AX
   958  		p.To.Type = obj.TYPE_REG
   959  		p.To.Reg = x86.REG_AX
   960  		p = s.Prog(x86.AJNE)
   961  		p.To.Type = obj.TYPE_BRANCH
   962  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   963  		if b.Succs[0].Block() != next {
   964  			p := s.Prog(obj.AJMP)
   965  			p.To.Type = obj.TYPE_BRANCH
   966  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   967  		}
   968  	case ssa.BlockExit:
   969  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   970  	case ssa.BlockRet:
   971  		s.Prog(obj.ARET)
   972  	case ssa.BlockRetJmp:
   973  		p := s.Prog(obj.AJMP)
   974  		p.To.Type = obj.TYPE_MEM
   975  		p.To.Name = obj.NAME_EXTERN
   976  		p.To.Sym = b.Aux.(*obj.LSym)
   977  
   978  	case ssa.BlockAMD64EQF:
   979  		s.FPJump(b, next, &eqfJumps)
   980  
   981  	case ssa.BlockAMD64NEF:
   982  		s.FPJump(b, next, &nefJumps)
   983  
   984  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
   985  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
   986  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
   987  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
   988  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
   989  		jmp := blockJump[b.Kind]
   990  		var p *obj.Prog
   991  		switch next {
   992  		case b.Succs[0].Block():
   993  			p = s.Prog(jmp.invasm)
   994  			p.To.Type = obj.TYPE_BRANCH
   995  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   996  		case b.Succs[1].Block():
   997  			p = s.Prog(jmp.asm)
   998  			p.To.Type = obj.TYPE_BRANCH
   999  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1000  		default:
  1001  			p = s.Prog(jmp.asm)
  1002  			p.To.Type = obj.TYPE_BRANCH
  1003  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1004  			q := s.Prog(obj.AJMP)
  1005  			q.To.Type = obj.TYPE_BRANCH
  1006  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
  1007  		}
  1008  
  1009  	default:
  1010  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1011  	}
  1012  }