github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/ppc64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/compile/internal/ssa"
    10  	"cmd/compile/internal/types"
    11  	"cmd/internal/obj"
    12  	"cmd/internal/obj/ppc64"
    13  	"cmd/internal/objabi"
    14  	"math"
    15  	"strings"
    16  )
    17  
    18  // iselOp encodes mapping of comparison operations onto ISEL operands
    19  type iselOp struct {
    20  	cond        int64
    21  	valueIfCond int // if cond is true, the value to return (0 or 1)
    22  }
    23  
    24  // Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1
    25  var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP}
    26  
    27  var iselOps = map[ssa.Op]iselOp{
    28  	ssa.OpPPC64Equal:         iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 1},
    29  	ssa.OpPPC64NotEqual:      iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 0},
    30  	ssa.OpPPC64LessThan:      iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
    31  	ssa.OpPPC64GreaterEqual:  iselOp{cond: ppc64.C_COND_LT, valueIfCond: 0},
    32  	ssa.OpPPC64GreaterThan:   iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
    33  	ssa.OpPPC64LessEqual:     iselOp{cond: ppc64.C_COND_GT, valueIfCond: 0},
    34  	ssa.OpPPC64FLessThan:     iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
    35  	ssa.OpPPC64FGreaterThan:  iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
    36  	ssa.OpPPC64FLessEqual:    iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    37  	ssa.OpPPC64FGreaterEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    38  }
    39  
    40  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    41  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    42  	//	flive := b.FlagsLiveAtEnd
    43  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    44  	//		flive = true
    45  	//	}
    46  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    47  	//		v := b.Values[i]
    48  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    49  	//			// The "mark" is any non-nil Aux value.
    50  	//			v.Aux = v
    51  	//		}
    52  	//		if v.Type.IsFlags() {
    53  	//			flive = false
    54  	//		}
    55  	//		for _, a := range v.Args {
    56  	//			if a.Type.IsFlags() {
    57  	//				flive = true
    58  	//			}
    59  	//		}
    60  	//	}
    61  }
    62  
    63  // loadByType returns the load instruction of the given type.
    64  func loadByType(t *types.Type) obj.As {
    65  	if t.IsFloat() {
    66  		switch t.Size() {
    67  		case 4:
    68  			return ppc64.AFMOVS
    69  		case 8:
    70  			return ppc64.AFMOVD
    71  		}
    72  	} else {
    73  		switch t.Size() {
    74  		case 1:
    75  			if t.IsSigned() {
    76  				return ppc64.AMOVB
    77  			} else {
    78  				return ppc64.AMOVBZ
    79  			}
    80  		case 2:
    81  			if t.IsSigned() {
    82  				return ppc64.AMOVH
    83  			} else {
    84  				return ppc64.AMOVHZ
    85  			}
    86  		case 4:
    87  			if t.IsSigned() {
    88  				return ppc64.AMOVW
    89  			} else {
    90  				return ppc64.AMOVWZ
    91  			}
    92  		case 8:
    93  			return ppc64.AMOVD
    94  		}
    95  	}
    96  	panic("bad load type")
    97  }
    98  
    99  // storeByType returns the store instruction of the given type.
   100  func storeByType(t *types.Type) obj.As {
   101  	if t.IsFloat() {
   102  		switch t.Size() {
   103  		case 4:
   104  			return ppc64.AFMOVS
   105  		case 8:
   106  			return ppc64.AFMOVD
   107  		}
   108  	} else {
   109  		switch t.Size() {
   110  		case 1:
   111  			return ppc64.AMOVB
   112  		case 2:
   113  			return ppc64.AMOVH
   114  		case 4:
   115  			return ppc64.AMOVW
   116  		case 8:
   117  			return ppc64.AMOVD
   118  		}
   119  	}
   120  	panic("bad store type")
   121  }
   122  
   123  func ssaGenISEL(s *gc.SSAGenState, v *ssa.Value, cr int64, r1, r2 int16) {
   124  	r := v.Reg()
   125  	p := s.Prog(ppc64.AISEL)
   126  	p.To.Type = obj.TYPE_REG
   127  	p.To.Reg = r
   128  	p.Reg = r1
   129  	p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   130  	p.From.Type = obj.TYPE_CONST
   131  	p.From.Offset = cr
   132  }
   133  
   134  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   135  	switch v.Op {
   136  	case ssa.OpCopy:
   137  		t := v.Type
   138  		if t.IsMemory() {
   139  			return
   140  		}
   141  		x := v.Args[0].Reg()
   142  		y := v.Reg()
   143  		if x != y {
   144  			rt := obj.TYPE_REG
   145  			op := ppc64.AMOVD
   146  
   147  			if t.IsFloat() {
   148  				op = ppc64.AFMOVD
   149  			}
   150  			p := s.Prog(op)
   151  			p.From.Type = rt
   152  			p.From.Reg = x
   153  			p.To.Type = rt
   154  			p.To.Reg = y
   155  		}
   156  
   157  	case ssa.OpPPC64LoweredMuluhilo:
   158  		// MULHDU	Rarg1, Rarg0, Reg0
   159  		// MULLD	Rarg1, Rarg0, Reg1
   160  		r0 := v.Args[0].Reg()
   161  		r1 := v.Args[1].Reg()
   162  		p := s.Prog(ppc64.AMULHDU)
   163  		p.From.Type = obj.TYPE_REG
   164  		p.From.Reg = r1
   165  		p.Reg = r0
   166  		p.To.Type = obj.TYPE_REG
   167  		p.To.Reg = v.Reg0()
   168  		p1 := s.Prog(ppc64.AMULLD)
   169  		p1.From.Type = obj.TYPE_REG
   170  		p1.From.Reg = r1
   171  		p1.Reg = r0
   172  		p1.To.Type = obj.TYPE_REG
   173  		p1.To.Reg = v.Reg1()
   174  
   175  	case ssa.OpPPC64LoweredAtomicAnd8,
   176  		ssa.OpPPC64LoweredAtomicOr8:
   177  		// LWSYNC
   178  		// LBAR		(Rarg0), Rtmp
   179  		// AND/OR	Rarg1, Rtmp
   180  		// STBCCC	Rtmp, (Rarg0)
   181  		// BNE		-3(PC)
   182  		r0 := v.Args[0].Reg()
   183  		r1 := v.Args[1].Reg()
   184  		// LWSYNC - Assuming shared data not write-through-required nor
   185  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   186  		plwsync := s.Prog(ppc64.ALWSYNC)
   187  		plwsync.To.Type = obj.TYPE_NONE
   188  		p := s.Prog(ppc64.ALBAR)
   189  		p.From.Type = obj.TYPE_MEM
   190  		p.From.Reg = r0
   191  		p.To.Type = obj.TYPE_REG
   192  		p.To.Reg = ppc64.REGTMP
   193  		p1 := s.Prog(v.Op.Asm())
   194  		p1.From.Type = obj.TYPE_REG
   195  		p1.From.Reg = r1
   196  		p1.To.Type = obj.TYPE_REG
   197  		p1.To.Reg = ppc64.REGTMP
   198  		p2 := s.Prog(ppc64.ASTBCCC)
   199  		p2.From.Type = obj.TYPE_REG
   200  		p2.From.Reg = ppc64.REGTMP
   201  		p2.To.Type = obj.TYPE_MEM
   202  		p2.To.Reg = r0
   203  		p2.RegTo2 = ppc64.REGTMP
   204  		p3 := s.Prog(ppc64.ABNE)
   205  		p3.To.Type = obj.TYPE_BRANCH
   206  		gc.Patch(p3, p)
   207  
   208  	case ssa.OpPPC64LoweredAtomicAdd32,
   209  		ssa.OpPPC64LoweredAtomicAdd64:
   210  		// LWSYNC
   211  		// LDAR/LWAR    (Rarg0), Rout
   212  		// ADD		Rarg1, Rout
   213  		// STDCCC/STWCCC Rout, (Rarg0)
   214  		// BNE         -3(PC)
   215  		// MOVW		Rout,Rout (if Add32)
   216  		ld := ppc64.ALDAR
   217  		st := ppc64.ASTDCCC
   218  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   219  			ld = ppc64.ALWAR
   220  			st = ppc64.ASTWCCC
   221  		}
   222  		r0 := v.Args[0].Reg()
   223  		r1 := v.Args[1].Reg()
   224  		out := v.Reg0()
   225  		// LWSYNC - Assuming shared data not write-through-required nor
   226  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   227  		plwsync := s.Prog(ppc64.ALWSYNC)
   228  		plwsync.To.Type = obj.TYPE_NONE
   229  		// LDAR or LWAR
   230  		p := s.Prog(ld)
   231  		p.From.Type = obj.TYPE_MEM
   232  		p.From.Reg = r0
   233  		p.To.Type = obj.TYPE_REG
   234  		p.To.Reg = out
   235  		// ADD reg1,out
   236  		p1 := s.Prog(ppc64.AADD)
   237  		p1.From.Type = obj.TYPE_REG
   238  		p1.From.Reg = r1
   239  		p1.To.Reg = out
   240  		p1.To.Type = obj.TYPE_REG
   241  		// STDCCC or STWCCC
   242  		p3 := s.Prog(st)
   243  		p3.From.Type = obj.TYPE_REG
   244  		p3.From.Reg = out
   245  		p3.To.Type = obj.TYPE_MEM
   246  		p3.To.Reg = r0
   247  		// BNE retry
   248  		p4 := s.Prog(ppc64.ABNE)
   249  		p4.To.Type = obj.TYPE_BRANCH
   250  		gc.Patch(p4, p)
   251  
   252  		// Ensure a 32 bit result
   253  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   254  			p5 := s.Prog(ppc64.AMOVWZ)
   255  			p5.To.Type = obj.TYPE_REG
   256  			p5.To.Reg = out
   257  			p5.From.Type = obj.TYPE_REG
   258  			p5.From.Reg = out
   259  		}
   260  
   261  	case ssa.OpPPC64LoweredAtomicExchange32,
   262  		ssa.OpPPC64LoweredAtomicExchange64:
   263  		// LWSYNC
   264  		// LDAR/LWAR    (Rarg0), Rout
   265  		// STDCCC/STWCCC Rout, (Rarg0)
   266  		// BNE         -2(PC)
   267  		// ISYNC
   268  		ld := ppc64.ALDAR
   269  		st := ppc64.ASTDCCC
   270  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   271  			ld = ppc64.ALWAR
   272  			st = ppc64.ASTWCCC
   273  		}
   274  		r0 := v.Args[0].Reg()
   275  		r1 := v.Args[1].Reg()
   276  		out := v.Reg0()
   277  		// LWSYNC - Assuming shared data not write-through-required nor
   278  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   279  		plwsync := s.Prog(ppc64.ALWSYNC)
   280  		plwsync.To.Type = obj.TYPE_NONE
   281  		// LDAR or LWAR
   282  		p := s.Prog(ld)
   283  		p.From.Type = obj.TYPE_MEM
   284  		p.From.Reg = r0
   285  		p.To.Type = obj.TYPE_REG
   286  		p.To.Reg = out
   287  		// STDCCC or STWCCC
   288  		p1 := s.Prog(st)
   289  		p1.From.Type = obj.TYPE_REG
   290  		p1.From.Reg = r1
   291  		p1.To.Type = obj.TYPE_MEM
   292  		p1.To.Reg = r0
   293  		// BNE retry
   294  		p2 := s.Prog(ppc64.ABNE)
   295  		p2.To.Type = obj.TYPE_BRANCH
   296  		gc.Patch(p2, p)
   297  		// ISYNC
   298  		pisync := s.Prog(ppc64.AISYNC)
   299  		pisync.To.Type = obj.TYPE_NONE
   300  
   301  	case ssa.OpPPC64LoweredAtomicLoad32,
   302  		ssa.OpPPC64LoweredAtomicLoad64,
   303  		ssa.OpPPC64LoweredAtomicLoadPtr:
   304  		// SYNC
   305  		// MOVD/MOVW (Rarg0), Rout
   306  		// CMP Rout,Rout
   307  		// BNE 1(PC)
   308  		// ISYNC
   309  		ld := ppc64.AMOVD
   310  		cmp := ppc64.ACMP
   311  		if v.Op == ssa.OpPPC64LoweredAtomicLoad32 {
   312  			ld = ppc64.AMOVW
   313  			cmp = ppc64.ACMPW
   314  		}
   315  		arg0 := v.Args[0].Reg()
   316  		out := v.Reg0()
   317  		// SYNC when AuxInt == 1; otherwise, load-acquire
   318  		if v.AuxInt == 1 {
   319  			psync := s.Prog(ppc64.ASYNC)
   320  			psync.To.Type = obj.TYPE_NONE
   321  		}
   322  		// Load
   323  		p := s.Prog(ld)
   324  		p.From.Type = obj.TYPE_MEM
   325  		p.From.Reg = arg0
   326  		p.To.Type = obj.TYPE_REG
   327  		p.To.Reg = out
   328  		// CMP
   329  		p1 := s.Prog(cmp)
   330  		p1.From.Type = obj.TYPE_REG
   331  		p1.From.Reg = out
   332  		p1.To.Type = obj.TYPE_REG
   333  		p1.To.Reg = out
   334  		// BNE
   335  		p2 := s.Prog(ppc64.ABNE)
   336  		p2.To.Type = obj.TYPE_BRANCH
   337  		// ISYNC
   338  		pisync := s.Prog(ppc64.AISYNC)
   339  		pisync.To.Type = obj.TYPE_NONE
   340  		gc.Patch(p2, pisync)
   341  
   342  	case ssa.OpPPC64LoweredAtomicStore32,
   343  		ssa.OpPPC64LoweredAtomicStore64:
   344  		// SYNC or LWSYNC
   345  		// MOVD/MOVW arg1,(arg0)
   346  		st := ppc64.AMOVD
   347  		if v.Op == ssa.OpPPC64LoweredAtomicStore32 {
   348  			st = ppc64.AMOVW
   349  		}
   350  		arg0 := v.Args[0].Reg()
   351  		arg1 := v.Args[1].Reg()
   352  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   353  		// SYNC
   354  		syncOp := ppc64.ASYNC
   355  		if v.AuxInt == 0 {
   356  			syncOp = ppc64.ALWSYNC
   357  		}
   358  		psync := s.Prog(syncOp)
   359  		psync.To.Type = obj.TYPE_NONE
   360  		// Store
   361  		p := s.Prog(st)
   362  		p.To.Type = obj.TYPE_MEM
   363  		p.To.Reg = arg0
   364  		p.From.Type = obj.TYPE_REG
   365  		p.From.Reg = arg1
   366  
   367  	case ssa.OpPPC64LoweredAtomicCas64,
   368  		ssa.OpPPC64LoweredAtomicCas32:
   369  		// LWSYNC
   370  		// loop:
   371  		// LDAR        (Rarg0), MutexHint, Rtmp
   372  		// CMP         Rarg1, Rtmp
   373  		// BNE         fail
   374  		// STDCCC      Rarg2, (Rarg0)
   375  		// BNE         loop
   376  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   377  		// MOVD        $1, Rout
   378  		// BR          end
   379  		// fail:
   380  		// MOVD        $0, Rout
   381  		// end:
   382  		ld := ppc64.ALDAR
   383  		st := ppc64.ASTDCCC
   384  		cmp := ppc64.ACMP
   385  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   386  			ld = ppc64.ALWAR
   387  			st = ppc64.ASTWCCC
   388  			cmp = ppc64.ACMPW
   389  		}
   390  		r0 := v.Args[0].Reg()
   391  		r1 := v.Args[1].Reg()
   392  		r2 := v.Args[2].Reg()
   393  		out := v.Reg0()
   394  		// LWSYNC - Assuming shared data not write-through-required nor
   395  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   396  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   397  		plwsync1.To.Type = obj.TYPE_NONE
   398  		// LDAR or LWAR
   399  		p := s.Prog(ld)
   400  		p.From.Type = obj.TYPE_MEM
   401  		p.From.Reg = r0
   402  		p.To.Type = obj.TYPE_REG
   403  		p.To.Reg = ppc64.REGTMP
   404  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   405  		// the release hint.
   406  		if v.AuxInt == 0 {
   407  			p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
   408  		}
   409  		// CMP reg1,reg2
   410  		p1 := s.Prog(cmp)
   411  		p1.From.Type = obj.TYPE_REG
   412  		p1.From.Reg = r1
   413  		p1.To.Reg = ppc64.REGTMP
   414  		p1.To.Type = obj.TYPE_REG
   415  		// BNE cas_fail
   416  		p2 := s.Prog(ppc64.ABNE)
   417  		p2.To.Type = obj.TYPE_BRANCH
   418  		// STDCCC or STWCCC
   419  		p3 := s.Prog(st)
   420  		p3.From.Type = obj.TYPE_REG
   421  		p3.From.Reg = r2
   422  		p3.To.Type = obj.TYPE_MEM
   423  		p3.To.Reg = r0
   424  		// BNE retry
   425  		p4 := s.Prog(ppc64.ABNE)
   426  		p4.To.Type = obj.TYPE_BRANCH
   427  		gc.Patch(p4, p)
   428  		// LWSYNC - Assuming shared data not write-through-required nor
   429  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   430  		// If the operation is a CAS-Release, then synchronization is not necessary.
   431  		if v.AuxInt != 0 {
   432  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   433  			plwsync2.To.Type = obj.TYPE_NONE
   434  		}
   435  		// return true
   436  		p5 := s.Prog(ppc64.AMOVD)
   437  		p5.From.Type = obj.TYPE_CONST
   438  		p5.From.Offset = 1
   439  		p5.To.Type = obj.TYPE_REG
   440  		p5.To.Reg = out
   441  		// BR done
   442  		p6 := s.Prog(obj.AJMP)
   443  		p6.To.Type = obj.TYPE_BRANCH
   444  		// return false
   445  		p7 := s.Prog(ppc64.AMOVD)
   446  		p7.From.Type = obj.TYPE_CONST
   447  		p7.From.Offset = 0
   448  		p7.To.Type = obj.TYPE_REG
   449  		p7.To.Reg = out
   450  		gc.Patch(p2, p7)
   451  		// done (label)
   452  		p8 := s.Prog(obj.ANOP)
   453  		gc.Patch(p6, p8)
   454  
   455  	case ssa.OpPPC64LoweredGetClosurePtr:
   456  		// Closure pointer is R11 (already)
   457  		gc.CheckLoweredGetClosurePtr(v)
   458  
   459  	case ssa.OpPPC64LoweredGetCallerSP:
   460  		// caller's SP is FixedFrameSize below the address of the first arg
   461  		p := s.Prog(ppc64.AMOVD)
   462  		p.From.Type = obj.TYPE_ADDR
   463  		p.From.Offset = -gc.Ctxt.FixedFrameSize()
   464  		p.From.Name = obj.NAME_PARAM
   465  		p.To.Type = obj.TYPE_REG
   466  		p.To.Reg = v.Reg()
   467  
   468  	case ssa.OpPPC64LoweredGetCallerPC:
   469  		p := s.Prog(obj.AGETCALLERPC)
   470  		p.To.Type = obj.TYPE_REG
   471  		p.To.Reg = v.Reg()
   472  
   473  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   474  		// input is already rounded
   475  
   476  	case ssa.OpLoadReg:
   477  		loadOp := loadByType(v.Type)
   478  		p := s.Prog(loadOp)
   479  		gc.AddrAuto(&p.From, v.Args[0])
   480  		p.To.Type = obj.TYPE_REG
   481  		p.To.Reg = v.Reg()
   482  
   483  	case ssa.OpStoreReg:
   484  		storeOp := storeByType(v.Type)
   485  		p := s.Prog(storeOp)
   486  		p.From.Type = obj.TYPE_REG
   487  		p.From.Reg = v.Args[0].Reg()
   488  		gc.AddrAuto(&p.To, v)
   489  
   490  	case ssa.OpPPC64DIVD:
   491  		// For now,
   492  		//
   493  		// cmp arg1, -1
   494  		// be  ahead
   495  		// v = arg0 / arg1
   496  		// b over
   497  		// ahead: v = - arg0
   498  		// over: nop
   499  		r := v.Reg()
   500  		r0 := v.Args[0].Reg()
   501  		r1 := v.Args[1].Reg()
   502  
   503  		p := s.Prog(ppc64.ACMP)
   504  		p.From.Type = obj.TYPE_REG
   505  		p.From.Reg = r1
   506  		p.To.Type = obj.TYPE_CONST
   507  		p.To.Offset = -1
   508  
   509  		pbahead := s.Prog(ppc64.ABEQ)
   510  		pbahead.To.Type = obj.TYPE_BRANCH
   511  
   512  		p = s.Prog(v.Op.Asm())
   513  		p.From.Type = obj.TYPE_REG
   514  		p.From.Reg = r1
   515  		p.Reg = r0
   516  		p.To.Type = obj.TYPE_REG
   517  		p.To.Reg = r
   518  
   519  		pbover := s.Prog(obj.AJMP)
   520  		pbover.To.Type = obj.TYPE_BRANCH
   521  
   522  		p = s.Prog(ppc64.ANEG)
   523  		p.To.Type = obj.TYPE_REG
   524  		p.To.Reg = r
   525  		p.From.Type = obj.TYPE_REG
   526  		p.From.Reg = r0
   527  		gc.Patch(pbahead, p)
   528  
   529  		p = s.Prog(obj.ANOP)
   530  		gc.Patch(pbover, p)
   531  
   532  	case ssa.OpPPC64DIVW:
   533  		// word-width version of above
   534  		r := v.Reg()
   535  		r0 := v.Args[0].Reg()
   536  		r1 := v.Args[1].Reg()
   537  
   538  		p := s.Prog(ppc64.ACMPW)
   539  		p.From.Type = obj.TYPE_REG
   540  		p.From.Reg = r1
   541  		p.To.Type = obj.TYPE_CONST
   542  		p.To.Offset = -1
   543  
   544  		pbahead := s.Prog(ppc64.ABEQ)
   545  		pbahead.To.Type = obj.TYPE_BRANCH
   546  
   547  		p = s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_REG
   549  		p.From.Reg = r1
   550  		p.Reg = r0
   551  		p.To.Type = obj.TYPE_REG
   552  		p.To.Reg = r
   553  
   554  		pbover := s.Prog(obj.AJMP)
   555  		pbover.To.Type = obj.TYPE_BRANCH
   556  
   557  		p = s.Prog(ppc64.ANEG)
   558  		p.To.Type = obj.TYPE_REG
   559  		p.To.Reg = r
   560  		p.From.Type = obj.TYPE_REG
   561  		p.From.Reg = r0
   562  		gc.Patch(pbahead, p)
   563  
   564  		p = s.Prog(obj.ANOP)
   565  		gc.Patch(pbover, p)
   566  
   567  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   568  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   569  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   570  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   571  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   572  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   573  		ssa.OpPPC64AND, ssa.OpPPC64ANDCC, ssa.OpPPC64OR, ssa.OpPPC64ORCC, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64XORCC, ssa.OpPPC64EQV:
   574  		r := v.Reg()
   575  		r1 := v.Args[0].Reg()
   576  		r2 := v.Args[1].Reg()
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_REG
   579  		p.From.Reg = r2
   580  		p.Reg = r1
   581  		p.To.Type = obj.TYPE_REG
   582  		p.To.Reg = r
   583  
   584  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   585  		p := s.Prog(v.Op.Asm())
   586  		p.From.Type = obj.TYPE_CONST
   587  		p.From.Offset = v.AuxInt
   588  		p.Reg = v.Args[0].Reg()
   589  		p.To.Type = obj.TYPE_REG
   590  		p.To.Reg = v.Reg()
   591  
   592  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   593  		r := v.Reg()
   594  		r1 := v.Args[0].Reg()
   595  		r2 := v.Args[1].Reg()
   596  		r3 := v.Args[2].Reg()
   597  		// r = r1*r2 ± r3
   598  		p := s.Prog(v.Op.Asm())
   599  		p.From.Type = obj.TYPE_REG
   600  		p.From.Reg = r1
   601  		p.Reg = r3
   602  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   603  		p.To.Type = obj.TYPE_REG
   604  		p.To.Reg = r
   605  
   606  	case ssa.OpPPC64MaskIfNotCarry:
   607  		r := v.Reg()
   608  		p := s.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_REG
   610  		p.From.Reg = ppc64.REGZERO
   611  		p.To.Type = obj.TYPE_REG
   612  		p.To.Reg = r
   613  
   614  	case ssa.OpPPC64ADDconstForCarry:
   615  		r1 := v.Args[0].Reg()
   616  		p := s.Prog(v.Op.Asm())
   617  		p.Reg = r1
   618  		p.From.Type = obj.TYPE_CONST
   619  		p.From.Offset = v.AuxInt
   620  		p.To.Type = obj.TYPE_REG
   621  		p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
   622  
   623  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND:
   624  		r := v.Reg()
   625  		p := s.Prog(v.Op.Asm())
   626  		p.To.Type = obj.TYPE_REG
   627  		p.To.Reg = r
   628  		p.From.Type = obj.TYPE_REG
   629  		p.From.Reg = v.Args[0].Reg()
   630  
   631  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   632  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
   633  		p := s.Prog(v.Op.Asm())
   634  		p.Reg = v.Args[0].Reg()
   635  		p.From.Type = obj.TYPE_CONST
   636  		p.From.Offset = v.AuxInt
   637  		p.To.Type = obj.TYPE_REG
   638  		p.To.Reg = v.Reg()
   639  
   640  	case ssa.OpPPC64ANDCCconst:
   641  		p := s.Prog(v.Op.Asm())
   642  		p.Reg = v.Args[0].Reg()
   643  
   644  		if v.Aux != nil {
   645  			p.From.Type = obj.TYPE_CONST
   646  			p.From.Offset = gc.AuxOffset(v)
   647  		} else {
   648  			p.From.Type = obj.TYPE_CONST
   649  			p.From.Offset = v.AuxInt
   650  		}
   651  
   652  		p.To.Type = obj.TYPE_REG
   653  		p.To.Reg = ppc64.REGTMP // discard result
   654  
   655  	case ssa.OpPPC64MOVDaddr:
   656  		switch v.Aux.(type) {
   657  		default:
   658  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   659  		case nil:
   660  			// If aux offset and aux int are both 0, and the same
   661  			// input and output regs are used, no instruction
   662  			// needs to be generated, since it would just be
   663  			// addi rx, rx, 0.
   664  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   665  				p := s.Prog(ppc64.AMOVD)
   666  				p.From.Type = obj.TYPE_ADDR
   667  				p.From.Reg = v.Args[0].Reg()
   668  				p.From.Offset = v.AuxInt
   669  				p.To.Type = obj.TYPE_REG
   670  				p.To.Reg = v.Reg()
   671  			}
   672  
   673  		case *obj.LSym, *gc.Node:
   674  			p := s.Prog(ppc64.AMOVD)
   675  			p.From.Type = obj.TYPE_ADDR
   676  			p.From.Reg = v.Args[0].Reg()
   677  			p.To.Type = obj.TYPE_REG
   678  			p.To.Reg = v.Reg()
   679  			gc.AddAux(&p.From, v)
   680  
   681  		}
   682  
   683  	case ssa.OpPPC64MOVDconst:
   684  		p := s.Prog(v.Op.Asm())
   685  		p.From.Type = obj.TYPE_CONST
   686  		p.From.Offset = v.AuxInt
   687  		p.To.Type = obj.TYPE_REG
   688  		p.To.Reg = v.Reg()
   689  
   690  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   691  		p := s.Prog(v.Op.Asm())
   692  		p.From.Type = obj.TYPE_FCONST
   693  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   694  		p.To.Type = obj.TYPE_REG
   695  		p.To.Reg = v.Reg()
   696  
   697  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   698  		p := s.Prog(v.Op.Asm())
   699  		p.From.Type = obj.TYPE_REG
   700  		p.From.Reg = v.Args[0].Reg()
   701  		p.To.Type = obj.TYPE_REG
   702  		p.To.Reg = v.Args[1].Reg()
   703  
   704  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   705  		p := s.Prog(v.Op.Asm())
   706  		p.From.Type = obj.TYPE_REG
   707  		p.From.Reg = v.Args[0].Reg()
   708  		p.To.Type = obj.TYPE_CONST
   709  		p.To.Offset = v.AuxInt
   710  
   711  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   712  		// Shift in register to required size
   713  		p := s.Prog(v.Op.Asm())
   714  		p.From.Type = obj.TYPE_REG
   715  		p.From.Reg = v.Args[0].Reg()
   716  		p.To.Reg = v.Reg()
   717  		p.To.Type = obj.TYPE_REG
   718  
   719  	case ssa.OpPPC64MOVDload:
   720  
   721  		// MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
   722  		// For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
   723  		// the offset is not known until link time. If the load of a go.string uses relocation for the
   724  		// offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
   725  		// To avoid this problem, the full address of the go.string is computed and loaded into the base register,
   726  		// and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
   727  		// go.string types because other types will have proper alignment.
   728  
   729  		gostring := false
   730  		switch n := v.Aux.(type) {
   731  		case *obj.LSym:
   732  			gostring = strings.HasPrefix(n.Name, "go.string.")
   733  		}
   734  		if gostring {
   735  			// Generate full addr of the go.string const
   736  			// including AuxInt
   737  			p := s.Prog(ppc64.AMOVD)
   738  			p.From.Type = obj.TYPE_ADDR
   739  			p.From.Reg = v.Args[0].Reg()
   740  			gc.AddAux(&p.From, v)
   741  			p.To.Type = obj.TYPE_REG
   742  			p.To.Reg = v.Reg()
   743  			// Load go.string using 0 offset
   744  			p = s.Prog(v.Op.Asm())
   745  			p.From.Type = obj.TYPE_MEM
   746  			p.From.Reg = v.Reg()
   747  			p.To.Type = obj.TYPE_REG
   748  			p.To.Reg = v.Reg()
   749  			break
   750  		}
   751  		// Not a go.string, generate a normal load
   752  		fallthrough
   753  
   754  	case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   755  		p := s.Prog(v.Op.Asm())
   756  		p.From.Type = obj.TYPE_MEM
   757  		p.From.Reg = v.Args[0].Reg()
   758  		gc.AddAux(&p.From, v)
   759  		p.To.Type = obj.TYPE_REG
   760  		p.To.Reg = v.Reg()
   761  
   762  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   763  		p := s.Prog(v.Op.Asm())
   764  		p.From.Type = obj.TYPE_MEM
   765  		p.From.Reg = v.Args[0].Reg()
   766  		p.To.Type = obj.TYPE_REG
   767  		p.To.Reg = v.Reg()
   768  
   769  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   770  		p := s.Prog(v.Op.Asm())
   771  		p.To.Type = obj.TYPE_MEM
   772  		p.To.Reg = v.Args[0].Reg()
   773  		p.From.Type = obj.TYPE_REG
   774  		p.From.Reg = v.Args[1].Reg()
   775  
   776  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   777  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   778  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   779  		p := s.Prog(v.Op.Asm())
   780  		p.From.Type = obj.TYPE_MEM
   781  		p.From.Reg = v.Args[0].Reg()
   782  		p.From.Index = v.Args[1].Reg()
   783  		gc.AddAux(&p.From, v)
   784  		p.To.Type = obj.TYPE_REG
   785  		p.To.Reg = v.Reg()
   786  
   787  	case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   788  		p := s.Prog(v.Op.Asm())
   789  		p.From.Type = obj.TYPE_REG
   790  		p.From.Reg = ppc64.REGZERO
   791  		p.To.Type = obj.TYPE_MEM
   792  		p.To.Reg = v.Args[0].Reg()
   793  		gc.AddAux(&p.To, v)
   794  
   795  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   796  		p := s.Prog(v.Op.Asm())
   797  		p.From.Type = obj.TYPE_REG
   798  		p.From.Reg = v.Args[1].Reg()
   799  		p.To.Type = obj.TYPE_MEM
   800  		p.To.Reg = v.Args[0].Reg()
   801  		gc.AddAux(&p.To, v)
   802  
   803  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   804  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   805  		ssa.OpPPC64MOVHBRstoreidx:
   806  		p := s.Prog(v.Op.Asm())
   807  		p.From.Type = obj.TYPE_REG
   808  		p.From.Reg = v.Args[2].Reg()
   809  		p.To.Index = v.Args[1].Reg()
   810  		p.To.Type = obj.TYPE_MEM
   811  		p.To.Reg = v.Args[0].Reg()
   812  		gc.AddAux(&p.To, v)
   813  
   814  	case ssa.OpPPC64Equal,
   815  		ssa.OpPPC64NotEqual,
   816  		ssa.OpPPC64LessThan,
   817  		ssa.OpPPC64FLessThan,
   818  		ssa.OpPPC64LessEqual,
   819  		ssa.OpPPC64GreaterThan,
   820  		ssa.OpPPC64FGreaterThan,
   821  		ssa.OpPPC64GreaterEqual:
   822  
   823  		// On Power7 or later, can use isel instruction:
   824  		// for a < b, a > b, a = b:
   825  		//   rtmp := 1
   826  		//   isel rt,rtmp,r0,cond // rt is target in ppc asm
   827  
   828  		// for  a >= b, a <= b, a != b:
   829  		//   rtmp := 1
   830  		//   isel rt,0,rtmp,!cond // rt is target in ppc asm
   831  
   832  		p := s.Prog(ppc64.AMOVD)
   833  		p.From.Type = obj.TYPE_CONST
   834  		p.From.Offset = 1
   835  		p.To.Type = obj.TYPE_REG
   836  		p.To.Reg = iselRegs[1]
   837  		iop := iselOps[v.Op]
   838  		ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   839  
   840  	case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
   841  		ssa.OpPPC64FGreaterEqual:
   842  
   843  		p := s.Prog(ppc64.AMOVD)
   844  		p.From.Type = obj.TYPE_CONST
   845  		p.From.Offset = 1
   846  		p.To.Type = obj.TYPE_REG
   847  		p.To.Reg = iselRegs[1]
   848  		iop := iselOps[v.Op]
   849  		ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   850  		ssaGenISEL(s, v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
   851  
   852  	case ssa.OpPPC64LoweredZero:
   853  
   854  		// unaligned data doesn't hurt performance
   855  		// for these instructions on power8 or later
   856  
   857  		// for sizes >= 64 generate a loop as follows:
   858  
   859  		// set up loop counter in CTR, used by BC
   860  		//	 MOVD len/32,REG_TMP
   861  		//	 MOVD REG_TMP,CTR
   862  		//	 loop:
   863  		//	 MOVD R0,(R3)
   864  		//	 MOVD R0,8(R3)
   865  		//	 MOVD R0,16(R3)
   866  		//	 MOVD R0,24(R3)
   867  		//	 ADD  $32,R3
   868  		//	 BC   16, 0, loop
   869  		//
   870  		// any remainder is done as described below
   871  
   872  		// for sizes < 64 bytes, first clear as many doublewords as possible,
   873  		// then handle the remainder
   874  		//	MOVD R0,(R3)
   875  		//	MOVD R0,8(R3)
   876  		// .... etc.
   877  		//
   878  		// the remainder bytes are cleared using one or more
   879  		// of the following instructions with the appropriate
   880  		// offsets depending which instructions are needed
   881  		//
   882  		//	MOVW R0,n1(R3)	4 bytes
   883  		//	MOVH R0,n2(R3)	2 bytes
   884  		//	MOVB R0,n3(R3)	1 byte
   885  		//
   886  		// 7 bytes: MOVW, MOVH, MOVB
   887  		// 6 bytes: MOVW, MOVH
   888  		// 5 bytes: MOVW, MOVB
   889  		// 3 bytes: MOVH, MOVB
   890  
   891  		// each loop iteration does 32 bytes
   892  		ctr := v.AuxInt / 32
   893  
   894  		// remainder bytes
   895  		rem := v.AuxInt % 32
   896  
   897  		// only generate a loop if there is more
   898  		// than 1 iteration.
   899  		if ctr > 1 {
   900  			// Set up CTR loop counter
   901  			p := s.Prog(ppc64.AMOVD)
   902  			p.From.Type = obj.TYPE_CONST
   903  			p.From.Offset = ctr
   904  			p.To.Type = obj.TYPE_REG
   905  			p.To.Reg = ppc64.REGTMP
   906  
   907  			p = s.Prog(ppc64.AMOVD)
   908  			p.From.Type = obj.TYPE_REG
   909  			p.From.Reg = ppc64.REGTMP
   910  			p.To.Type = obj.TYPE_REG
   911  			p.To.Reg = ppc64.REG_CTR
   912  
   913  			// generate 4 MOVDs
   914  			// when this is a loop then the top must be saved
   915  			var top *obj.Prog
   916  			for offset := int64(0); offset < 32; offset += 8 {
   917  				// This is the top of loop
   918  				p := s.Prog(ppc64.AMOVD)
   919  				p.From.Type = obj.TYPE_REG
   920  				p.From.Reg = ppc64.REG_R0
   921  				p.To.Type = obj.TYPE_MEM
   922  				p.To.Reg = v.Args[0].Reg()
   923  				p.To.Offset = offset
   924  				// Save the top of loop
   925  				if top == nil {
   926  					top = p
   927  				}
   928  			}
   929  
   930  			// Increment address for the
   931  			// 4 doublewords just zeroed.
   932  			p = s.Prog(ppc64.AADD)
   933  			p.Reg = v.Args[0].Reg()
   934  			p.From.Type = obj.TYPE_CONST
   935  			p.From.Offset = 32
   936  			p.To.Type = obj.TYPE_REG
   937  			p.To.Reg = v.Args[0].Reg()
   938  
   939  			// Branch back to top of loop
   940  			// based on CTR
   941  			// BC with BO_BCTR generates bdnz
   942  			p = s.Prog(ppc64.ABC)
   943  			p.From.Type = obj.TYPE_CONST
   944  			p.From.Offset = ppc64.BO_BCTR
   945  			p.Reg = ppc64.REG_R0
   946  			p.To.Type = obj.TYPE_BRANCH
   947  			gc.Patch(p, top)
   948  		}
   949  
   950  		// when ctr == 1 the loop was not generated but
   951  		// there are at least 32 bytes to clear, so add
   952  		// that to the remainder to generate the code
   953  		// to clear those doublewords
   954  		if ctr == 1 {
   955  			rem += 32
   956  		}
   957  
   958  		// clear the remainder starting at offset zero
   959  		offset := int64(0)
   960  
   961  		// first clear as many doublewords as possible
   962  		// then clear remaining sizes as available
   963  		for rem > 0 {
   964  			op, size := ppc64.AMOVB, int64(1)
   965  			switch {
   966  			case rem >= 8:
   967  				op, size = ppc64.AMOVD, 8
   968  			case rem >= 4:
   969  				op, size = ppc64.AMOVW, 4
   970  			case rem >= 2:
   971  				op, size = ppc64.AMOVH, 2
   972  			}
   973  			p := s.Prog(op)
   974  			p.From.Type = obj.TYPE_REG
   975  			p.From.Reg = ppc64.REG_R0
   976  			p.To.Type = obj.TYPE_MEM
   977  			p.To.Reg = v.Args[0].Reg()
   978  			p.To.Offset = offset
   979  			rem -= size
   980  			offset += size
   981  		}
   982  
   983  	case ssa.OpPPC64LoweredMove:
   984  
   985  		// This will be used when moving more
   986  		// than 8 bytes.  Moves start with
   987  		// as many 8 byte moves as possible, then
   988  		// 4, 2, or 1 byte(s) as remaining.  This will
   989  		// work and be efficient for power8 or later.
   990  		// If there are 64 or more bytes, then a
   991  		// loop is generated to move 32 bytes and
   992  		// update the src and dst addresses on each
   993  		// iteration. When < 64 bytes, the appropriate
   994  		// number of moves are generated based on the
   995  		// size.
   996  		// When moving >= 64 bytes a loop is used
   997  		//	MOVD len/32,REG_TMP
   998  		//	MOVD REG_TMP,CTR
   999  		// top:
  1000  		//	MOVD (R4),R7
  1001  		//	MOVD 8(R4),R8
  1002  		//	MOVD 16(R4),R9
  1003  		//	MOVD 24(R4),R10
  1004  		//	ADD  R4,$32
  1005  		//	MOVD R7,(R3)
  1006  		//	MOVD R8,8(R3)
  1007  		//	MOVD R9,16(R3)
  1008  		//	MOVD R10,24(R3)
  1009  		//	ADD  R3,$32
  1010  		//	BC 16,0,top
  1011  		// Bytes not moved by this loop are moved
  1012  		// with a combination of the following instructions,
  1013  		// starting with the largest sizes and generating as
  1014  		// many as needed, using the appropriate offset value.
  1015  		//	MOVD  n(R4),R7
  1016  		//	MOVD  R7,n(R3)
  1017  		//	MOVW  n1(R4),R7
  1018  		//	MOVW  R7,n1(R3)
  1019  		//	MOVH  n2(R4),R7
  1020  		//	MOVH  R7,n2(R3)
  1021  		//	MOVB  n3(R4),R7
  1022  		//	MOVB  R7,n3(R3)
  1023  
  1024  		// Each loop iteration moves 32 bytes
  1025  		ctr := v.AuxInt / 32
  1026  
  1027  		// Remainder after the loop
  1028  		rem := v.AuxInt % 32
  1029  
  1030  		dst_reg := v.Args[0].Reg()
  1031  		src_reg := v.Args[1].Reg()
  1032  
  1033  		// The set of registers used here, must match the clobbered reg list
  1034  		// in PPC64Ops.go.
  1035  		useregs := []int16{ppc64.REG_R7, ppc64.REG_R8, ppc64.REG_R9, ppc64.REG_R10}
  1036  		offset := int64(0)
  1037  
  1038  		// top of the loop
  1039  		var top *obj.Prog
  1040  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1041  		if ctr > 1 {
  1042  			// Set up the CTR
  1043  			p := s.Prog(ppc64.AMOVD)
  1044  			p.From.Type = obj.TYPE_CONST
  1045  			p.From.Offset = ctr
  1046  			p.To.Type = obj.TYPE_REG
  1047  			p.To.Reg = ppc64.REGTMP
  1048  
  1049  			p = s.Prog(ppc64.AMOVD)
  1050  			p.From.Type = obj.TYPE_REG
  1051  			p.From.Reg = ppc64.REGTMP
  1052  			p.To.Type = obj.TYPE_REG
  1053  			p.To.Reg = ppc64.REG_CTR
  1054  
  1055  			// Generate all the MOVDs for loads
  1056  			// based off the same register, increasing
  1057  			// the offset by 8 for each instruction
  1058  			for _, rg := range useregs {
  1059  				p := s.Prog(ppc64.AMOVD)
  1060  				p.From.Type = obj.TYPE_MEM
  1061  				p.From.Reg = src_reg
  1062  				p.From.Offset = offset
  1063  				p.To.Type = obj.TYPE_REG
  1064  				p.To.Reg = rg
  1065  				if top == nil {
  1066  					top = p
  1067  				}
  1068  				offset += 8
  1069  			}
  1070  			// increment the src_reg for next iteration
  1071  			p = s.Prog(ppc64.AADD)
  1072  			p.Reg = src_reg
  1073  			p.From.Type = obj.TYPE_CONST
  1074  			p.From.Offset = 32
  1075  			p.To.Type = obj.TYPE_REG
  1076  			p.To.Reg = src_reg
  1077  
  1078  			// generate the MOVDs for stores, based
  1079  			// off the same register, using the same
  1080  			// offsets as in the loads.
  1081  			offset = int64(0)
  1082  			for _, rg := range useregs {
  1083  				p := s.Prog(ppc64.AMOVD)
  1084  				p.From.Type = obj.TYPE_REG
  1085  				p.From.Reg = rg
  1086  				p.To.Type = obj.TYPE_MEM
  1087  				p.To.Reg = dst_reg
  1088  				p.To.Offset = offset
  1089  				offset += 8
  1090  			}
  1091  			// increment the dst_reg for next iteration
  1092  			p = s.Prog(ppc64.AADD)
  1093  			p.Reg = dst_reg
  1094  			p.From.Type = obj.TYPE_CONST
  1095  			p.From.Offset = 32
  1096  			p.To.Type = obj.TYPE_REG
  1097  			p.To.Reg = dst_reg
  1098  
  1099  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1100  			// to loop top.
  1101  			p = s.Prog(ppc64.ABC)
  1102  			p.From.Type = obj.TYPE_CONST
  1103  			p.From.Offset = ppc64.BO_BCTR
  1104  			p.Reg = ppc64.REG_R0
  1105  			p.To.Type = obj.TYPE_BRANCH
  1106  			gc.Patch(p, top)
  1107  
  1108  			// src_reg and dst_reg were incremented in the loop, so
  1109  			// later instructions start with offset 0.
  1110  			offset = int64(0)
  1111  		}
  1112  
  1113  		// No loop was generated for one iteration, so
  1114  		// add 32 bytes to the remainder to move those bytes.
  1115  		if ctr == 1 {
  1116  			rem += 32
  1117  		}
  1118  
  1119  		// Generate all the remaining load and store pairs, starting with
  1120  		// as many 8 byte moves as possible, then 4, 2, 1.
  1121  		for rem > 0 {
  1122  			op, size := ppc64.AMOVB, int64(1)
  1123  			switch {
  1124  			case rem >= 8:
  1125  				op, size = ppc64.AMOVD, 8
  1126  			case rem >= 4:
  1127  				op, size = ppc64.AMOVW, 4
  1128  			case rem >= 2:
  1129  				op, size = ppc64.AMOVH, 2
  1130  			}
  1131  			// Load
  1132  			p := s.Prog(op)
  1133  			p.To.Type = obj.TYPE_REG
  1134  			p.To.Reg = ppc64.REG_R7
  1135  			p.From.Type = obj.TYPE_MEM
  1136  			p.From.Reg = src_reg
  1137  			p.From.Offset = offset
  1138  
  1139  			// Store
  1140  			p = s.Prog(op)
  1141  			p.From.Type = obj.TYPE_REG
  1142  			p.From.Reg = ppc64.REG_R7
  1143  			p.To.Type = obj.TYPE_MEM
  1144  			p.To.Reg = dst_reg
  1145  			p.To.Offset = offset
  1146  			rem -= size
  1147  			offset += size
  1148  		}
  1149  
  1150  	case ssa.OpPPC64CALLstatic:
  1151  		s.Call(v)
  1152  
  1153  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1154  		p := s.Prog(ppc64.AMOVD)
  1155  		p.From.Type = obj.TYPE_REG
  1156  		p.From.Reg = v.Args[0].Reg()
  1157  		p.To.Type = obj.TYPE_REG
  1158  		p.To.Reg = ppc64.REG_CTR
  1159  
  1160  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1161  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1162  		}
  1163  
  1164  		pp := s.Call(v)
  1165  		pp.To.Reg = ppc64.REG_CTR
  1166  
  1167  		if gc.Ctxt.Flag_shared {
  1168  			// When compiling Go into PIC, the function we just
  1169  			// called via pointer might have been implemented in
  1170  			// a separate module and so overwritten the TOC
  1171  			// pointer in R2; reload it.
  1172  			q := s.Prog(ppc64.AMOVD)
  1173  			q.From.Type = obj.TYPE_MEM
  1174  			q.From.Offset = 24
  1175  			q.From.Reg = ppc64.REGSP
  1176  			q.To.Type = obj.TYPE_REG
  1177  			q.To.Reg = ppc64.REG_R2
  1178  		}
  1179  
  1180  	case ssa.OpPPC64LoweredWB:
  1181  		p := s.Prog(obj.ACALL)
  1182  		p.To.Type = obj.TYPE_MEM
  1183  		p.To.Name = obj.NAME_EXTERN
  1184  		p.To.Sym = v.Aux.(*obj.LSym)
  1185  
  1186  	case ssa.OpPPC64LoweredNilCheck:
  1187  		if objabi.GOOS == "aix" {
  1188  			// CMP Rarg0, R0
  1189  			// BNE 2(PC)
  1190  			// STW R0, 0(R0)
  1191  			// NOP (so the BNE has somewhere to land)
  1192  
  1193  			// CMP Rarg0, R0
  1194  			p := s.Prog(ppc64.ACMP)
  1195  			p.From.Type = obj.TYPE_REG
  1196  			p.From.Reg = v.Args[0].Reg()
  1197  			p.To.Type = obj.TYPE_REG
  1198  			p.To.Reg = ppc64.REG_R0
  1199  
  1200  			// BNE 2(PC)
  1201  			p2 := s.Prog(ppc64.ABNE)
  1202  			p2.To.Type = obj.TYPE_BRANCH
  1203  
  1204  			// STW R0, 0(R0)
  1205  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1206  			p = s.Prog(ppc64.AMOVW)
  1207  			p.From.Type = obj.TYPE_REG
  1208  			p.From.Reg = ppc64.REG_R0
  1209  			p.To.Type = obj.TYPE_MEM
  1210  			p.To.Reg = ppc64.REG_R0
  1211  
  1212  			// NOP (so the BNE has somewhere to land)
  1213  			nop := s.Prog(obj.ANOP)
  1214  			gc.Patch(p2, nop)
  1215  
  1216  		} else {
  1217  			// Issue a load which will fault if arg is nil.
  1218  			p := s.Prog(ppc64.AMOVBZ)
  1219  			p.From.Type = obj.TYPE_MEM
  1220  			p.From.Reg = v.Args[0].Reg()
  1221  			gc.AddAux(&p.From, v)
  1222  			p.To.Type = obj.TYPE_REG
  1223  			p.To.Reg = ppc64.REGTMP
  1224  		}
  1225  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1226  			gc.Warnl(v.Pos, "generated nil check")
  1227  		}
  1228  
  1229  	case ssa.OpPPC64InvertFlags:
  1230  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1231  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1232  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1233  	case ssa.OpClobber:
  1234  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1235  	default:
  1236  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1237  	}
  1238  }
  1239  
  1240  var blockJump = [...]struct {
  1241  	asm, invasm     obj.As
  1242  	asmeq, invasmun bool
  1243  }{
  1244  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1245  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1246  
  1247  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1248  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1249  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1250  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1251  
  1252  	// TODO: need to work FP comparisons into block jumps
  1253  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1254  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1255  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1256  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1257  }
  1258  
  1259  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1260  	switch b.Kind {
  1261  	case ssa.BlockDefer:
  1262  		// defer returns in R3:
  1263  		// 0 if we should continue executing
  1264  		// 1 if we should jump to deferreturn call
  1265  		p := s.Prog(ppc64.ACMP)
  1266  		p.From.Type = obj.TYPE_REG
  1267  		p.From.Reg = ppc64.REG_R3
  1268  		p.To.Type = obj.TYPE_REG
  1269  		p.To.Reg = ppc64.REG_R0
  1270  
  1271  		p = s.Prog(ppc64.ABNE)
  1272  		p.To.Type = obj.TYPE_BRANCH
  1273  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1274  		if b.Succs[0].Block() != next {
  1275  			p := s.Prog(obj.AJMP)
  1276  			p.To.Type = obj.TYPE_BRANCH
  1277  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1278  		}
  1279  
  1280  	case ssa.BlockPlain:
  1281  		if b.Succs[0].Block() != next {
  1282  			p := s.Prog(obj.AJMP)
  1283  			p.To.Type = obj.TYPE_BRANCH
  1284  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1285  		}
  1286  	case ssa.BlockExit:
  1287  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
  1288  	case ssa.BlockRet:
  1289  		s.Prog(obj.ARET)
  1290  	case ssa.BlockRetJmp:
  1291  		p := s.Prog(obj.AJMP)
  1292  		p.To.Type = obj.TYPE_MEM
  1293  		p.To.Name = obj.NAME_EXTERN
  1294  		p.To.Sym = b.Aux.(*obj.LSym)
  1295  
  1296  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  1297  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  1298  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  1299  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  1300  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  1301  		jmp := blockJump[b.Kind]
  1302  		switch next {
  1303  		case b.Succs[0].Block():
  1304  			s.Br(jmp.invasm, b.Succs[1].Block())
  1305  			if jmp.invasmun {
  1306  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1307  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  1308  			}
  1309  		case b.Succs[1].Block():
  1310  			s.Br(jmp.asm, b.Succs[0].Block())
  1311  			if jmp.asmeq {
  1312  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1313  			}
  1314  		default:
  1315  			if b.Likely != ssa.BranchUnlikely {
  1316  				s.Br(jmp.asm, b.Succs[0].Block())
  1317  				if jmp.asmeq {
  1318  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1319  				}
  1320  				s.Br(obj.AJMP, b.Succs[1].Block())
  1321  			} else {
  1322  				s.Br(jmp.invasm, b.Succs[1].Block())
  1323  				if jmp.invasmun {
  1324  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1325  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  1326  				}
  1327  				s.Br(obj.AJMP, b.Succs[0].Block())
  1328  			}
  1329  		}
  1330  	default:
  1331  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1332  	}
  1333  }