github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/cmd/compile/internal/amd64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/x86"
    16  )
    17  
    18  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20  	flive := b.FlagsLiveAtEnd
    21  	if b.Control != nil && b.Control.Type.IsFlags() {
    22  		flive = true
    23  	}
    24  	for i := len(b.Values) - 1; i >= 0; i-- {
    25  		v := b.Values[i]
    26  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    27  			// The "mark" is any non-nil Aux value.
    28  			v.Aux = v
    29  		}
    30  		if v.Type.IsFlags() {
    31  			flive = false
    32  		}
    33  		for _, a := range v.Args {
    34  			if a.Type.IsFlags() {
    35  				flive = true
    36  			}
    37  		}
    38  	}
    39  }
    40  
    41  // loadByType returns the load instruction of the given type.
    42  func loadByType(t *types.Type) obj.As {
    43  	// Avoid partial register write
    44  	if !t.IsFloat() && t.Size() <= 2 {
    45  		if t.Size() == 1 {
    46  			return x86.AMOVBLZX
    47  		} else {
    48  			return x86.AMOVWLZX
    49  		}
    50  	}
    51  	// Otherwise, there's no difference between load and store opcodes.
    52  	return storeByType(t)
    53  }
    54  
    55  // storeByType returns the store instruction of the given type.
    56  func storeByType(t *types.Type) obj.As {
    57  	width := t.Size()
    58  	if t.IsFloat() {
    59  		switch width {
    60  		case 4:
    61  			return x86.AMOVSS
    62  		case 8:
    63  			return x86.AMOVSD
    64  		}
    65  	} else {
    66  		switch width {
    67  		case 1:
    68  			return x86.AMOVB
    69  		case 2:
    70  			return x86.AMOVW
    71  		case 4:
    72  			return x86.AMOVL
    73  		case 8:
    74  			return x86.AMOVQ
    75  		}
    76  	}
    77  	panic("bad store type")
    78  }
    79  
    80  // moveByType returns the reg->reg move instruction of the given type.
    81  func moveByType(t *types.Type) obj.As {
    82  	if t.IsFloat() {
    83  		// Moving the whole sse2 register is faster
    84  		// than moving just the correct low portion of it.
    85  		// There is no xmm->xmm move with 1 byte opcode,
    86  		// so use movups, which has 2 byte opcode.
    87  		return x86.AMOVUPS
    88  	} else {
    89  		switch t.Size() {
    90  		case 1:
    91  			// Avoids partial register write
    92  			return x86.AMOVL
    93  		case 2:
    94  			return x86.AMOVL
    95  		case 4:
    96  			return x86.AMOVL
    97  		case 8:
    98  			return x86.AMOVQ
    99  		case 16:
   100  			return x86.AMOVUPS // int128s are in SSE registers
   101  		default:
   102  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   103  		}
   104  	}
   105  }
   106  
   107  // opregreg emits instructions for
   108  //     dest := dest(To) op src(From)
   109  // and also returns the created obj.Prog so it
   110  // may be further adjusted (offset, scale, etc).
   111  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   112  	p := s.Prog(op)
   113  	p.From.Type = obj.TYPE_REG
   114  	p.To.Type = obj.TYPE_REG
   115  	p.To.Reg = dest
   116  	p.From.Reg = src
   117  	return p
   118  }
   119  
   120  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
   121  // See runtime/mkduff.go.
   122  func duffStart(size int64) int64 {
   123  	x, _ := duff(size)
   124  	return x
   125  }
   126  func duffAdj(size int64) int64 {
   127  	_, x := duff(size)
   128  	return x
   129  }
   130  
   131  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   132  // required to use the duffzero mechanism for a block of the given size.
   133  func duff(size int64) (int64, int64) {
   134  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   135  		panic("bad duffzero size")
   136  	}
   137  	steps := size / dzClearStep
   138  	blocks := steps / dzBlockLen
   139  	steps %= dzBlockLen
   140  	off := dzBlockSize * (dzBlocks - blocks)
   141  	var adj int64
   142  	if steps != 0 {
   143  		off -= dzAddSize
   144  		off -= dzMovSize * steps
   145  		adj -= dzClearStep * (dzBlockLen - steps)
   146  	}
   147  	return off, adj
   148  }
   149  
   150  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   151  	switch v.Op {
   152  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   153  		r := v.Reg()
   154  		r1 := v.Args[0].Reg()
   155  		r2 := v.Args[1].Reg()
   156  		switch {
   157  		case r == r1:
   158  			p := s.Prog(v.Op.Asm())
   159  			p.From.Type = obj.TYPE_REG
   160  			p.From.Reg = r2
   161  			p.To.Type = obj.TYPE_REG
   162  			p.To.Reg = r
   163  		case r == r2:
   164  			p := s.Prog(v.Op.Asm())
   165  			p.From.Type = obj.TYPE_REG
   166  			p.From.Reg = r1
   167  			p.To.Type = obj.TYPE_REG
   168  			p.To.Reg = r
   169  		default:
   170  			var asm obj.As
   171  			if v.Op == ssa.OpAMD64ADDQ {
   172  				asm = x86.ALEAQ
   173  			} else {
   174  				asm = x86.ALEAL
   175  			}
   176  			p := s.Prog(asm)
   177  			p.From.Type = obj.TYPE_MEM
   178  			p.From.Reg = r1
   179  			p.From.Scale = 1
   180  			p.From.Index = r2
   181  			p.To.Type = obj.TYPE_REG
   182  			p.To.Reg = r
   183  		}
   184  	// 2-address opcode arithmetic
   185  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   186  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   187  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   188  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   189  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   190  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   191  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   192  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   193  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   194  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   195  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   196  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   197  		ssa.OpAMD64PXOR:
   198  		r := v.Reg()
   199  		if r != v.Args[0].Reg() {
   200  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   201  		}
   202  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   203  
   204  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   205  		// Arg[0] (the dividend) is in AX.
   206  		// Arg[1] (the divisor) can be in any other register.
   207  		// Result[0] (the quotient) is in AX.
   208  		// Result[1] (the remainder) is in DX.
   209  		r := v.Args[1].Reg()
   210  
   211  		// Zero extend dividend.
   212  		c := s.Prog(x86.AXORL)
   213  		c.From.Type = obj.TYPE_REG
   214  		c.From.Reg = x86.REG_DX
   215  		c.To.Type = obj.TYPE_REG
   216  		c.To.Reg = x86.REG_DX
   217  
   218  		// Issue divide.
   219  		p := s.Prog(v.Op.Asm())
   220  		p.From.Type = obj.TYPE_REG
   221  		p.From.Reg = r
   222  
   223  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   224  		// Arg[0] (the dividend) is in AX.
   225  		// Arg[1] (the divisor) can be in any other register.
   226  		// Result[0] (the quotient) is in AX.
   227  		// Result[1] (the remainder) is in DX.
   228  		r := v.Args[1].Reg()
   229  
   230  		// CPU faults upon signed overflow, which occurs when the most
   231  		// negative int is divided by -1. Handle divide by -1 as a special case.
   232  		var c *obj.Prog
   233  		switch v.Op {
   234  		case ssa.OpAMD64DIVQ:
   235  			c = s.Prog(x86.ACMPQ)
   236  		case ssa.OpAMD64DIVL:
   237  			c = s.Prog(x86.ACMPL)
   238  		case ssa.OpAMD64DIVW:
   239  			c = s.Prog(x86.ACMPW)
   240  		}
   241  		c.From.Type = obj.TYPE_REG
   242  		c.From.Reg = r
   243  		c.To.Type = obj.TYPE_CONST
   244  		c.To.Offset = -1
   245  		j1 := s.Prog(x86.AJEQ)
   246  		j1.To.Type = obj.TYPE_BRANCH
   247  
   248  		// Sign extend dividend.
   249  		switch v.Op {
   250  		case ssa.OpAMD64DIVQ:
   251  			s.Prog(x86.ACQO)
   252  		case ssa.OpAMD64DIVL:
   253  			s.Prog(x86.ACDQ)
   254  		case ssa.OpAMD64DIVW:
   255  			s.Prog(x86.ACWD)
   256  		}
   257  
   258  		// Issue divide.
   259  		p := s.Prog(v.Op.Asm())
   260  		p.From.Type = obj.TYPE_REG
   261  		p.From.Reg = r
   262  
   263  		// Skip over -1 fixup code.
   264  		j2 := s.Prog(obj.AJMP)
   265  		j2.To.Type = obj.TYPE_BRANCH
   266  
   267  		// Issue -1 fixup code.
   268  		// n / -1 = -n
   269  		n1 := s.Prog(x86.ANEGQ)
   270  		n1.To.Type = obj.TYPE_REG
   271  		n1.To.Reg = x86.REG_AX
   272  
   273  		// n % -1 == 0
   274  		n2 := s.Prog(x86.AXORL)
   275  		n2.From.Type = obj.TYPE_REG
   276  		n2.From.Reg = x86.REG_DX
   277  		n2.To.Type = obj.TYPE_REG
   278  		n2.To.Reg = x86.REG_DX
   279  
   280  		// TODO(khr): issue only the -1 fixup code we need.
   281  		// For instance, if only the quotient is used, no point in zeroing the remainder.
   282  
   283  		j1.To.Val = n1
   284  		j2.To.Val = s.Pc()
   285  
   286  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   287  		// the frontend rewrites constant division by 8/16/32 bit integers into
   288  		// HMUL by a constant
   289  		// SSA rewrites generate the 64 bit versions
   290  
   291  		// Arg[0] is already in AX as it's the only register we allow
   292  		// and DX is the only output we care about (the high bits)
   293  		p := s.Prog(v.Op.Asm())
   294  		p.From.Type = obj.TYPE_REG
   295  		p.From.Reg = v.Args[1].Reg()
   296  
   297  		// IMULB puts the high portion in AH instead of DL,
   298  		// so move it to DL for consistency
   299  		if v.Type.Size() == 1 {
   300  			m := s.Prog(x86.AMOVB)
   301  			m.From.Type = obj.TYPE_REG
   302  			m.From.Reg = x86.REG_AH
   303  			m.To.Type = obj.TYPE_REG
   304  			m.To.Reg = x86.REG_DX
   305  		}
   306  
   307  	case ssa.OpAMD64MULQU2:
   308  		// Arg[0] is already in AX as it's the only register we allow
   309  		// results hi in DX, lo in AX
   310  		p := s.Prog(v.Op.Asm())
   311  		p.From.Type = obj.TYPE_REG
   312  		p.From.Reg = v.Args[1].Reg()
   313  
   314  	case ssa.OpAMD64DIVQU2:
   315  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   316  		// results q in AX, r in DX
   317  		p := s.Prog(v.Op.Asm())
   318  		p.From.Type = obj.TYPE_REG
   319  		p.From.Reg = v.Args[2].Reg()
   320  
   321  	case ssa.OpAMD64AVGQU:
   322  		// compute (x+y)/2 unsigned.
   323  		// Do a 64-bit add, the overflow goes into the carry.
   324  		// Shift right once and pull the carry back into the 63rd bit.
   325  		r := v.Reg()
   326  		if r != v.Args[0].Reg() {
   327  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   328  		}
   329  		p := s.Prog(x86.AADDQ)
   330  		p.From.Type = obj.TYPE_REG
   331  		p.To.Type = obj.TYPE_REG
   332  		p.To.Reg = r
   333  		p.From.Reg = v.Args[1].Reg()
   334  		p = s.Prog(x86.ARCRQ)
   335  		p.From.Type = obj.TYPE_CONST
   336  		p.From.Offset = 1
   337  		p.To.Type = obj.TYPE_REG
   338  		p.To.Reg = r
   339  
   340  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   341  		r := v.Reg()
   342  		a := v.Args[0].Reg()
   343  		if r == a {
   344  			if v.AuxInt == 1 {
   345  				var asm obj.As
   346  				// Software optimization manual recommends add $1,reg.
   347  				// But inc/dec is 1 byte smaller. ICC always uses inc
   348  				// Clang/GCC choose depending on flags, but prefer add.
   349  				// Experiments show that inc/dec is both a little faster
   350  				// and make a binary a little smaller.
   351  				if v.Op == ssa.OpAMD64ADDQconst {
   352  					asm = x86.AINCQ
   353  				} else {
   354  					asm = x86.AINCL
   355  				}
   356  				p := s.Prog(asm)
   357  				p.To.Type = obj.TYPE_REG
   358  				p.To.Reg = r
   359  				return
   360  			}
   361  			if v.AuxInt == -1 {
   362  				var asm obj.As
   363  				if v.Op == ssa.OpAMD64ADDQconst {
   364  					asm = x86.ADECQ
   365  				} else {
   366  					asm = x86.ADECL
   367  				}
   368  				p := s.Prog(asm)
   369  				p.To.Type = obj.TYPE_REG
   370  				p.To.Reg = r
   371  				return
   372  			}
   373  			p := s.Prog(v.Op.Asm())
   374  			p.From.Type = obj.TYPE_CONST
   375  			p.From.Offset = v.AuxInt
   376  			p.To.Type = obj.TYPE_REG
   377  			p.To.Reg = r
   378  			return
   379  		}
   380  		var asm obj.As
   381  		if v.Op == ssa.OpAMD64ADDQconst {
   382  			asm = x86.ALEAQ
   383  		} else {
   384  			asm = x86.ALEAL
   385  		}
   386  		p := s.Prog(asm)
   387  		p.From.Type = obj.TYPE_MEM
   388  		p.From.Reg = a
   389  		p.From.Offset = v.AuxInt
   390  		p.To.Type = obj.TYPE_REG
   391  		p.To.Reg = r
   392  
   393  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
   394  		r := v.Reg()
   395  		if r != v.Args[0].Reg() {
   396  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   397  		}
   398  		p := s.Prog(v.Op.Asm())
   399  		p.From.Type = obj.TYPE_REG
   400  		p.From.Reg = v.Args[1].Reg()
   401  		p.To.Type = obj.TYPE_REG
   402  		p.To.Reg = r
   403  
   404  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   405  		r := v.Reg()
   406  		if r != v.Args[0].Reg() {
   407  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   408  		}
   409  		p := s.Prog(v.Op.Asm())
   410  		p.From.Type = obj.TYPE_CONST
   411  		p.From.Offset = v.AuxInt
   412  		p.To.Type = obj.TYPE_REG
   413  		p.To.Reg = r
   414  		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
   415  		// then we don't need to use resultInArg0 for these ops.
   416  		//p.From3 = new(obj.Addr)
   417  		//p.From3.Type = obj.TYPE_REG
   418  		//p.From3.Reg = v.Args[0].Reg()
   419  
   420  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   421  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   422  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   423  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   424  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   425  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   426  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   427  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   428  		r := v.Reg()
   429  		if r != v.Args[0].Reg() {
   430  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   431  		}
   432  		p := s.Prog(v.Op.Asm())
   433  		p.From.Type = obj.TYPE_CONST
   434  		p.From.Offset = v.AuxInt
   435  		p.To.Type = obj.TYPE_REG
   436  		p.To.Reg = r
   437  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   438  		r := v.Reg()
   439  		p := s.Prog(v.Op.Asm())
   440  		p.From.Type = obj.TYPE_REG
   441  		p.From.Reg = r
   442  		p.To.Type = obj.TYPE_REG
   443  		p.To.Reg = r
   444  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   445  		r := v.Args[0].Reg()
   446  		i := v.Args[1].Reg()
   447  		p := s.Prog(x86.ALEAQ)
   448  		switch v.Op {
   449  		case ssa.OpAMD64LEAQ1:
   450  			p.From.Scale = 1
   451  			if i == x86.REG_SP {
   452  				r, i = i, r
   453  			}
   454  		case ssa.OpAMD64LEAQ2:
   455  			p.From.Scale = 2
   456  		case ssa.OpAMD64LEAQ4:
   457  			p.From.Scale = 4
   458  		case ssa.OpAMD64LEAQ8:
   459  			p.From.Scale = 8
   460  		}
   461  		p.From.Type = obj.TYPE_MEM
   462  		p.From.Reg = r
   463  		p.From.Index = i
   464  		gc.AddAux(&p.From, v)
   465  		p.To.Type = obj.TYPE_REG
   466  		p.To.Reg = v.Reg()
   467  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
   468  		p := s.Prog(v.Op.Asm())
   469  		p.From.Type = obj.TYPE_MEM
   470  		p.From.Reg = v.Args[0].Reg()
   471  		gc.AddAux(&p.From, v)
   472  		p.To.Type = obj.TYPE_REG
   473  		p.To.Reg = v.Reg()
   474  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   475  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   476  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   477  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   478  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   479  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   480  		// must account for that right here.
   481  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   482  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   483  		p := s.Prog(v.Op.Asm())
   484  		p.From.Type = obj.TYPE_REG
   485  		p.From.Reg = v.Args[0].Reg()
   486  		p.To.Type = obj.TYPE_CONST
   487  		p.To.Offset = v.AuxInt
   488  	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   489  		ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
   490  		p := s.Prog(v.Op.Asm())
   491  		p.From.Type = obj.TYPE_CONST
   492  		p.From.Offset = v.AuxInt
   493  		p.To.Type = obj.TYPE_REG
   494  		p.To.Reg = v.Args[0].Reg()
   495  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   496  		x := v.Reg()
   497  		asm := v.Op.Asm()
   498  		// Use MOVL to move a small constant into a register
   499  		// when the constant is positive and fits into 32 bits.
   500  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   501  			// The upper 32bit are zeroed automatically when using MOVL.
   502  			asm = x86.AMOVL
   503  		}
   504  		p := s.Prog(asm)
   505  		p.From.Type = obj.TYPE_CONST
   506  		p.From.Offset = v.AuxInt
   507  		p.To.Type = obj.TYPE_REG
   508  		p.To.Reg = x
   509  		// If flags are live at this instruction, suppress the
   510  		// MOV $0,AX -> XOR AX,AX optimization.
   511  		if v.Aux != nil {
   512  			p.Mark |= x86.PRESERVEFLAGS
   513  		}
   514  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   515  		x := v.Reg()
   516  		p := s.Prog(v.Op.Asm())
   517  		p.From.Type = obj.TYPE_FCONST
   518  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   519  		p.To.Type = obj.TYPE_REG
   520  		p.To.Reg = x
   521  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   522  		p := s.Prog(v.Op.Asm())
   523  		p.From.Type = obj.TYPE_MEM
   524  		p.From.Reg = v.Args[0].Reg()
   525  		gc.AddAux(&p.From, v)
   526  		p.To.Type = obj.TYPE_REG
   527  		p.To.Reg = v.Reg()
   528  	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
   529  		p := s.Prog(v.Op.Asm())
   530  		p.From.Type = obj.TYPE_MEM
   531  		p.From.Reg = v.Args[0].Reg()
   532  		gc.AddAux(&p.From, v)
   533  		p.From.Scale = 8
   534  		p.From.Index = v.Args[1].Reg()
   535  		p.To.Type = obj.TYPE_REG
   536  		p.To.Reg = v.Reg()
   537  	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
   538  		p := s.Prog(v.Op.Asm())
   539  		p.From.Type = obj.TYPE_MEM
   540  		p.From.Reg = v.Args[0].Reg()
   541  		gc.AddAux(&p.From, v)
   542  		p.From.Scale = 4
   543  		p.From.Index = v.Args[1].Reg()
   544  		p.To.Type = obj.TYPE_REG
   545  		p.To.Reg = v.Reg()
   546  	case ssa.OpAMD64MOVWloadidx2:
   547  		p := s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_MEM
   549  		p.From.Reg = v.Args[0].Reg()
   550  		gc.AddAux(&p.From, v)
   551  		p.From.Scale = 2
   552  		p.From.Index = v.Args[1].Reg()
   553  		p.To.Type = obj.TYPE_REG
   554  		p.To.Reg = v.Reg()
   555  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
   556  		r := v.Args[0].Reg()
   557  		i := v.Args[1].Reg()
   558  		if i == x86.REG_SP {
   559  			r, i = i, r
   560  		}
   561  		p := s.Prog(v.Op.Asm())
   562  		p.From.Type = obj.TYPE_MEM
   563  		p.From.Reg = r
   564  		p.From.Scale = 1
   565  		p.From.Index = i
   566  		gc.AddAux(&p.From, v)
   567  		p.To.Type = obj.TYPE_REG
   568  		p.To.Reg = v.Reg()
   569  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
   570  		p := s.Prog(v.Op.Asm())
   571  		p.From.Type = obj.TYPE_REG
   572  		p.From.Reg = v.Args[1].Reg()
   573  		p.To.Type = obj.TYPE_MEM
   574  		p.To.Reg = v.Args[0].Reg()
   575  		gc.AddAux(&p.To, v)
   576  	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_REG
   579  		p.From.Reg = v.Args[2].Reg()
   580  		p.To.Type = obj.TYPE_MEM
   581  		p.To.Reg = v.Args[0].Reg()
   582  		p.To.Scale = 8
   583  		p.To.Index = v.Args[1].Reg()
   584  		gc.AddAux(&p.To, v)
   585  	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
   586  		p := s.Prog(v.Op.Asm())
   587  		p.From.Type = obj.TYPE_REG
   588  		p.From.Reg = v.Args[2].Reg()
   589  		p.To.Type = obj.TYPE_MEM
   590  		p.To.Reg = v.Args[0].Reg()
   591  		p.To.Scale = 4
   592  		p.To.Index = v.Args[1].Reg()
   593  		gc.AddAux(&p.To, v)
   594  	case ssa.OpAMD64MOVWstoreidx2:
   595  		p := s.Prog(v.Op.Asm())
   596  		p.From.Type = obj.TYPE_REG
   597  		p.From.Reg = v.Args[2].Reg()
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = v.Args[0].Reg()
   600  		p.To.Scale = 2
   601  		p.To.Index = v.Args[1].Reg()
   602  		gc.AddAux(&p.To, v)
   603  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
   604  		r := v.Args[0].Reg()
   605  		i := v.Args[1].Reg()
   606  		if i == x86.REG_SP {
   607  			r, i = i, r
   608  		}
   609  		p := s.Prog(v.Op.Asm())
   610  		p.From.Type = obj.TYPE_REG
   611  		p.From.Reg = v.Args[2].Reg()
   612  		p.To.Type = obj.TYPE_MEM
   613  		p.To.Reg = r
   614  		p.To.Scale = 1
   615  		p.To.Index = i
   616  		gc.AddAux(&p.To, v)
   617  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   618  		p := s.Prog(v.Op.Asm())
   619  		p.From.Type = obj.TYPE_CONST
   620  		sc := v.AuxValAndOff()
   621  		p.From.Offset = sc.Val()
   622  		p.To.Type = obj.TYPE_MEM
   623  		p.To.Reg = v.Args[0].Reg()
   624  		gc.AddAux2(&p.To, v, sc.Off())
   625  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   626  		p := s.Prog(v.Op.Asm())
   627  		p.From.Type = obj.TYPE_CONST
   628  		sc := v.AuxValAndOff()
   629  		p.From.Offset = sc.Val()
   630  		r := v.Args[0].Reg()
   631  		i := v.Args[1].Reg()
   632  		switch v.Op {
   633  		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
   634  			p.To.Scale = 1
   635  			if i == x86.REG_SP {
   636  				r, i = i, r
   637  			}
   638  		case ssa.OpAMD64MOVWstoreconstidx2:
   639  			p.To.Scale = 2
   640  		case ssa.OpAMD64MOVLstoreconstidx4:
   641  			p.To.Scale = 4
   642  		case ssa.OpAMD64MOVQstoreconstidx8:
   643  			p.To.Scale = 8
   644  		}
   645  		p.To.Type = obj.TYPE_MEM
   646  		p.To.Reg = r
   647  		p.To.Index = i
   648  		gc.AddAux2(&p.To, v, sc.Off())
   649  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   650  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   651  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   652  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   653  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   654  		r := v.Reg()
   655  		// Break false dependency on destination register.
   656  		opregreg(s, x86.AXORPS, r, r)
   657  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   658  	case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
   659  		ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
   660  		ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
   661  		ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
   662  		p := s.Prog(v.Op.Asm())
   663  		p.From.Type = obj.TYPE_MEM
   664  		p.From.Reg = v.Args[1].Reg()
   665  		gc.AddAux(&p.From, v)
   666  		p.To.Type = obj.TYPE_REG
   667  		p.To.Reg = v.Reg()
   668  		if v.Reg() != v.Args[0].Reg() {
   669  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   670  		}
   671  	case ssa.OpAMD64DUFFZERO:
   672  		off := duffStart(v.AuxInt)
   673  		adj := duffAdj(v.AuxInt)
   674  		var p *obj.Prog
   675  		if adj != 0 {
   676  			p = s.Prog(x86.AADDQ)
   677  			p.From.Type = obj.TYPE_CONST
   678  			p.From.Offset = adj
   679  			p.To.Type = obj.TYPE_REG
   680  			p.To.Reg = x86.REG_DI
   681  		}
   682  		p = s.Prog(obj.ADUFFZERO)
   683  		p.To.Type = obj.TYPE_ADDR
   684  		p.To.Sym = gc.Duffzero
   685  		p.To.Offset = off
   686  	case ssa.OpAMD64MOVOconst:
   687  		if v.AuxInt != 0 {
   688  			v.Fatalf("MOVOconst can only do constant=0")
   689  		}
   690  		r := v.Reg()
   691  		opregreg(s, x86.AXORPS, r, r)
   692  	case ssa.OpAMD64DUFFCOPY:
   693  		p := s.Prog(obj.ADUFFCOPY)
   694  		p.To.Type = obj.TYPE_ADDR
   695  		p.To.Sym = gc.Duffcopy
   696  		p.To.Offset = v.AuxInt
   697  
   698  	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   699  		if v.Type.IsMemory() {
   700  			return
   701  		}
   702  		x := v.Args[0].Reg()
   703  		y := v.Reg()
   704  		if x != y {
   705  			opregreg(s, moveByType(v.Type), y, x)
   706  		}
   707  	case ssa.OpLoadReg:
   708  		if v.Type.IsFlags() {
   709  			v.Fatalf("load flags not implemented: %v", v.LongString())
   710  			return
   711  		}
   712  		p := s.Prog(loadByType(v.Type))
   713  		gc.AddrAuto(&p.From, v.Args[0])
   714  		p.To.Type = obj.TYPE_REG
   715  		p.To.Reg = v.Reg()
   716  
   717  	case ssa.OpStoreReg:
   718  		if v.Type.IsFlags() {
   719  			v.Fatalf("store flags not implemented: %v", v.LongString())
   720  			return
   721  		}
   722  		p := s.Prog(storeByType(v.Type))
   723  		p.From.Type = obj.TYPE_REG
   724  		p.From.Reg = v.Args[0].Reg()
   725  		gc.AddrAuto(&p.To, v)
   726  	case ssa.OpAMD64LoweredGetClosurePtr:
   727  		// Closure pointer is DX.
   728  		gc.CheckLoweredGetClosurePtr(v)
   729  	case ssa.OpAMD64LoweredGetG:
   730  		r := v.Reg()
   731  		// See the comments in cmd/internal/obj/x86/obj6.go
   732  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   733  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   734  			// MOVQ (TLS), r
   735  			p := s.Prog(x86.AMOVQ)
   736  			p.From.Type = obj.TYPE_MEM
   737  			p.From.Reg = x86.REG_TLS
   738  			p.To.Type = obj.TYPE_REG
   739  			p.To.Reg = r
   740  		} else {
   741  			// MOVQ TLS, r
   742  			// MOVQ (r)(TLS*1), r
   743  			p := s.Prog(x86.AMOVQ)
   744  			p.From.Type = obj.TYPE_REG
   745  			p.From.Reg = x86.REG_TLS
   746  			p.To.Type = obj.TYPE_REG
   747  			p.To.Reg = r
   748  			q := s.Prog(x86.AMOVQ)
   749  			q.From.Type = obj.TYPE_MEM
   750  			q.From.Reg = r
   751  			q.From.Index = x86.REG_TLS
   752  			q.From.Scale = 1
   753  			q.To.Type = obj.TYPE_REG
   754  			q.To.Reg = r
   755  		}
   756  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   757  		s.Call(v)
   758  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   759  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   760  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   761  		r := v.Reg()
   762  		if r != v.Args[0].Reg() {
   763  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   764  		}
   765  		p := s.Prog(v.Op.Asm())
   766  		p.To.Type = obj.TYPE_REG
   767  		p.To.Reg = r
   768  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
   769  		p := s.Prog(v.Op.Asm())
   770  		p.From.Type = obj.TYPE_REG
   771  		p.From.Reg = v.Args[0].Reg()
   772  		p.To.Type = obj.TYPE_REG
   773  		p.To.Reg = v.Reg0()
   774  	case ssa.OpAMD64SQRTSD:
   775  		p := s.Prog(v.Op.Asm())
   776  		p.From.Type = obj.TYPE_REG
   777  		p.From.Reg = v.Args[0].Reg()
   778  		p.To.Type = obj.TYPE_REG
   779  		p.To.Reg = v.Reg()
   780  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
   781  		if v.Args[0].Reg() != v.Reg() {
   782  			// POPCNT on Intel has a false dependency on the destination register.
   783  			// Zero the destination to break the dependency.
   784  			p := s.Prog(x86.AMOVQ)
   785  			p.From.Type = obj.TYPE_CONST
   786  			p.From.Offset = 0
   787  			p.To.Type = obj.TYPE_REG
   788  			p.To.Reg = v.Reg()
   789  		}
   790  		p := s.Prog(v.Op.Asm())
   791  		p.From.Type = obj.TYPE_REG
   792  		p.From.Reg = v.Args[0].Reg()
   793  		p.To.Type = obj.TYPE_REG
   794  		p.To.Reg = v.Reg()
   795  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
   796  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
   797  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
   798  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
   799  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
   800  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
   801  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
   802  		p := s.Prog(v.Op.Asm())
   803  		p.To.Type = obj.TYPE_REG
   804  		p.To.Reg = v.Reg()
   805  
   806  	case ssa.OpAMD64SETNEF:
   807  		p := s.Prog(v.Op.Asm())
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = v.Reg()
   810  		q := s.Prog(x86.ASETPS)
   811  		q.To.Type = obj.TYPE_REG
   812  		q.To.Reg = x86.REG_AX
   813  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
   814  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   815  
   816  	case ssa.OpAMD64SETEQF:
   817  		p := s.Prog(v.Op.Asm())
   818  		p.To.Type = obj.TYPE_REG
   819  		p.To.Reg = v.Reg()
   820  		q := s.Prog(x86.ASETPC)
   821  		q.To.Type = obj.TYPE_REG
   822  		q.To.Reg = x86.REG_AX
   823  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
   824  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   825  
   826  	case ssa.OpAMD64InvertFlags:
   827  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   828  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
   829  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   830  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
   831  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
   832  	case ssa.OpAMD64REPSTOSQ:
   833  		s.Prog(x86.AREP)
   834  		s.Prog(x86.ASTOSQ)
   835  	case ssa.OpAMD64REPMOVSQ:
   836  		s.Prog(x86.AREP)
   837  		s.Prog(x86.AMOVSQ)
   838  	case ssa.OpAMD64LoweredNilCheck:
   839  		// Issue a load which will fault if the input is nil.
   840  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   841  		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
   842  		// but it doesn't have false dependency on AX.
   843  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   844  		// That trades clobbering flags for clobbering a register.
   845  		p := s.Prog(x86.ATESTB)
   846  		p.From.Type = obj.TYPE_REG
   847  		p.From.Reg = x86.REG_AX
   848  		p.To.Type = obj.TYPE_MEM
   849  		p.To.Reg = v.Args[0].Reg()
   850  		gc.AddAux(&p.To, v)
   851  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   852  			gc.Warnl(v.Pos, "generated nil check")
   853  		}
   854  	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
   855  		p := s.Prog(v.Op.Asm())
   856  		p.From.Type = obj.TYPE_MEM
   857  		p.From.Reg = v.Args[0].Reg()
   858  		gc.AddAux(&p.From, v)
   859  		p.To.Type = obj.TYPE_REG
   860  		p.To.Reg = v.Reg0()
   861  	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
   862  		r := v.Reg0()
   863  		if r != v.Args[0].Reg() {
   864  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   865  		}
   866  		p := s.Prog(v.Op.Asm())
   867  		p.From.Type = obj.TYPE_REG
   868  		p.From.Reg = r
   869  		p.To.Type = obj.TYPE_MEM
   870  		p.To.Reg = v.Args[1].Reg()
   871  		gc.AddAux(&p.To, v)
   872  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
   873  		r := v.Reg0()
   874  		if r != v.Args[0].Reg() {
   875  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   876  		}
   877  		s.Prog(x86.ALOCK)
   878  		p := s.Prog(v.Op.Asm())
   879  		p.From.Type = obj.TYPE_REG
   880  		p.From.Reg = r
   881  		p.To.Type = obj.TYPE_MEM
   882  		p.To.Reg = v.Args[1].Reg()
   883  		gc.AddAux(&p.To, v)
   884  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
   885  		if v.Args[1].Reg() != x86.REG_AX {
   886  			v.Fatalf("input[1] not in AX %s", v.LongString())
   887  		}
   888  		s.Prog(x86.ALOCK)
   889  		p := s.Prog(v.Op.Asm())
   890  		p.From.Type = obj.TYPE_REG
   891  		p.From.Reg = v.Args[2].Reg()
   892  		p.To.Type = obj.TYPE_MEM
   893  		p.To.Reg = v.Args[0].Reg()
   894  		gc.AddAux(&p.To, v)
   895  		p = s.Prog(x86.ASETEQ)
   896  		p.To.Type = obj.TYPE_REG
   897  		p.To.Reg = v.Reg0()
   898  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
   899  		s.Prog(x86.ALOCK)
   900  		p := s.Prog(v.Op.Asm())
   901  		p.From.Type = obj.TYPE_REG
   902  		p.From.Reg = v.Args[1].Reg()
   903  		p.To.Type = obj.TYPE_MEM
   904  		p.To.Reg = v.Args[0].Reg()
   905  		gc.AddAux(&p.To, v)
   906  	case ssa.OpClobber:
   907  		p := s.Prog(x86.AMOVL)
   908  		p.From.Type = obj.TYPE_CONST
   909  		p.From.Offset = 0xdeaddead
   910  		p.To.Type = obj.TYPE_MEM
   911  		p.To.Reg = x86.REG_SP
   912  		gc.AddAux(&p.To, v)
   913  		p = s.Prog(x86.AMOVL)
   914  		p.From.Type = obj.TYPE_CONST
   915  		p.From.Offset = 0xdeaddead
   916  		p.To.Type = obj.TYPE_MEM
   917  		p.To.Reg = x86.REG_SP
   918  		gc.AddAux(&p.To, v)
   919  		p.To.Offset += 4
   920  	default:
   921  		v.Fatalf("genValue not implemented: %s", v.LongString())
   922  	}
   923  }
   924  
   925  var blockJump = [...]struct {
   926  	asm, invasm obj.As
   927  }{
   928  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
   929  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
   930  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
   931  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
   932  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
   933  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
   934  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
   935  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
   936  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
   937  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
   938  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
   939  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
   940  }
   941  
   942  var eqfJumps = [2][2]gc.FloatingEQNEJump{
   943  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   944  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   945  }
   946  var nefJumps = [2][2]gc.FloatingEQNEJump{
   947  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   948  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   949  }
   950  
   951  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   952  	switch b.Kind {
   953  	case ssa.BlockPlain:
   954  		if b.Succs[0].Block() != next {
   955  			p := s.Prog(obj.AJMP)
   956  			p.To.Type = obj.TYPE_BRANCH
   957  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   958  		}
   959  	case ssa.BlockDefer:
   960  		// defer returns in rax:
   961  		// 0 if we should continue executing
   962  		// 1 if we should jump to deferreturn call
   963  		p := s.Prog(x86.ATESTL)
   964  		p.From.Type = obj.TYPE_REG
   965  		p.From.Reg = x86.REG_AX
   966  		p.To.Type = obj.TYPE_REG
   967  		p.To.Reg = x86.REG_AX
   968  		p = s.Prog(x86.AJNE)
   969  		p.To.Type = obj.TYPE_BRANCH
   970  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   971  		if b.Succs[0].Block() != next {
   972  			p := s.Prog(obj.AJMP)
   973  			p.To.Type = obj.TYPE_BRANCH
   974  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   975  		}
   976  	case ssa.BlockExit:
   977  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
   978  	case ssa.BlockRet:
   979  		s.Prog(obj.ARET)
   980  	case ssa.BlockRetJmp:
   981  		p := s.Prog(obj.AJMP)
   982  		p.To.Type = obj.TYPE_MEM
   983  		p.To.Name = obj.NAME_EXTERN
   984  		p.To.Sym = b.Aux.(*obj.LSym)
   985  
   986  	case ssa.BlockAMD64EQF:
   987  		s.FPJump(b, next, &eqfJumps)
   988  
   989  	case ssa.BlockAMD64NEF:
   990  		s.FPJump(b, next, &nefJumps)
   991  
   992  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
   993  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
   994  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
   995  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
   996  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
   997  		jmp := blockJump[b.Kind]
   998  		var p *obj.Prog
   999  		switch next {
  1000  		case b.Succs[0].Block():
  1001  			p = s.Prog(jmp.invasm)
  1002  			p.To.Type = obj.TYPE_BRANCH
  1003  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1004  		case b.Succs[1].Block():
  1005  			p = s.Prog(jmp.asm)
  1006  			p.To.Type = obj.TYPE_BRANCH
  1007  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1008  		default:
  1009  			p = s.Prog(jmp.asm)
  1010  			p.To.Type = obj.TYPE_BRANCH
  1011  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1012  			q := s.Prog(obj.AJMP)
  1013  			q.To.Type = obj.TYPE_BRANCH
  1014  			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
  1015  		}
  1016  
  1017  	default:
  1018  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1019  	}
  1020  }