github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/ppc64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/compile/internal/ssa"
    10  	"cmd/compile/internal/types"
    11  	"cmd/internal/obj"
    12  	"cmd/internal/obj/ppc64"
    13  	"math"
    14  	"strings"
    15  )
    16  
    17  // iselOp encodes mapping of comparison operations onto ISEL operands
    18  type iselOp struct {
    19  	cond        int64
    20  	valueIfCond int // if cond is true, the value to return (0 or 1)
    21  }
    22  
    23  // Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1
    24  var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP}
    25  
    26  var iselOps = map[ssa.Op]iselOp{
    27  	ssa.OpPPC64Equal:         iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 1},
    28  	ssa.OpPPC64NotEqual:      iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 0},
    29  	ssa.OpPPC64LessThan:      iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
    30  	ssa.OpPPC64GreaterEqual:  iselOp{cond: ppc64.C_COND_LT, valueIfCond: 0},
    31  	ssa.OpPPC64GreaterThan:   iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
    32  	ssa.OpPPC64LessEqual:     iselOp{cond: ppc64.C_COND_GT, valueIfCond: 0},
    33  	ssa.OpPPC64FLessThan:     iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1},
    34  	ssa.OpPPC64FGreaterThan:  iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1},
    35  	ssa.OpPPC64FLessEqual:    iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    36  	ssa.OpPPC64FGreaterEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    37  }
    38  
    39  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    40  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    41  	//	flive := b.FlagsLiveAtEnd
    42  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    43  	//		flive = true
    44  	//	}
    45  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    46  	//		v := b.Values[i]
    47  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    48  	//			// The "mark" is any non-nil Aux value.
    49  	//			v.Aux = v
    50  	//		}
    51  	//		if v.Type.IsFlags() {
    52  	//			flive = false
    53  	//		}
    54  	//		for _, a := range v.Args {
    55  	//			if a.Type.IsFlags() {
    56  	//				flive = true
    57  	//			}
    58  	//		}
    59  	//	}
    60  }
    61  
    62  // loadByType returns the load instruction of the given type.
    63  func loadByType(t *types.Type) obj.As {
    64  	if t.IsFloat() {
    65  		switch t.Size() {
    66  		case 4:
    67  			return ppc64.AFMOVS
    68  		case 8:
    69  			return ppc64.AFMOVD
    70  		}
    71  	} else {
    72  		switch t.Size() {
    73  		case 1:
    74  			if t.IsSigned() {
    75  				return ppc64.AMOVB
    76  			} else {
    77  				return ppc64.AMOVBZ
    78  			}
    79  		case 2:
    80  			if t.IsSigned() {
    81  				return ppc64.AMOVH
    82  			} else {
    83  				return ppc64.AMOVHZ
    84  			}
    85  		case 4:
    86  			if t.IsSigned() {
    87  				return ppc64.AMOVW
    88  			} else {
    89  				return ppc64.AMOVWZ
    90  			}
    91  		case 8:
    92  			return ppc64.AMOVD
    93  		}
    94  	}
    95  	panic("bad load type")
    96  }
    97  
    98  // storeByType returns the store instruction of the given type.
    99  func storeByType(t *types.Type) obj.As {
   100  	if t.IsFloat() {
   101  		switch t.Size() {
   102  		case 4:
   103  			return ppc64.AFMOVS
   104  		case 8:
   105  			return ppc64.AFMOVD
   106  		}
   107  	} else {
   108  		switch t.Size() {
   109  		case 1:
   110  			return ppc64.AMOVB
   111  		case 2:
   112  			return ppc64.AMOVH
   113  		case 4:
   114  			return ppc64.AMOVW
   115  		case 8:
   116  			return ppc64.AMOVD
   117  		}
   118  	}
   119  	panic("bad store type")
   120  }
   121  
   122  func ssaGenISEL(s *gc.SSAGenState, v *ssa.Value, cr int64, r1, r2 int16) {
   123  	r := v.Reg()
   124  	p := s.Prog(ppc64.AISEL)
   125  	p.To.Type = obj.TYPE_REG
   126  	p.To.Reg = r
   127  	p.Reg = r1
   128  	p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   129  	p.From.Type = obj.TYPE_CONST
   130  	p.From.Offset = cr
   131  }
   132  
   133  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   134  	switch v.Op {
   135  	case ssa.OpCopy:
   136  		t := v.Type
   137  		if t.IsMemory() {
   138  			return
   139  		}
   140  		x := v.Args[0].Reg()
   141  		y := v.Reg()
   142  		if x != y {
   143  			rt := obj.TYPE_REG
   144  			op := ppc64.AMOVD
   145  
   146  			if t.IsFloat() {
   147  				op = ppc64.AFMOVD
   148  			}
   149  			p := s.Prog(op)
   150  			p.From.Type = rt
   151  			p.From.Reg = x
   152  			p.To.Type = rt
   153  			p.To.Reg = y
   154  		}
   155  
   156  	case ssa.OpPPC64LoweredMuluhilo:
   157  		// MULHDU	Rarg1, Rarg0, Reg0
   158  		// MULLD	Rarg1, Rarg0, Reg1
   159  		r0 := v.Args[0].Reg()
   160  		r1 := v.Args[1].Reg()
   161  		p := s.Prog(ppc64.AMULHDU)
   162  		p.From.Type = obj.TYPE_REG
   163  		p.From.Reg = r1
   164  		p.Reg = r0
   165  		p.To.Type = obj.TYPE_REG
   166  		p.To.Reg = v.Reg0()
   167  		p1 := s.Prog(ppc64.AMULLD)
   168  		p1.From.Type = obj.TYPE_REG
   169  		p1.From.Reg = r1
   170  		p1.Reg = r0
   171  		p1.To.Type = obj.TYPE_REG
   172  		p1.To.Reg = v.Reg1()
   173  
   174  	case ssa.OpPPC64LoweredAtomicAnd8,
   175  		ssa.OpPPC64LoweredAtomicOr8:
   176  		// LWSYNC
   177  		// LBAR		(Rarg0), Rtmp
   178  		// AND/OR	Rarg1, Rtmp
   179  		// STBCCC	Rtmp, (Rarg0)
   180  		// BNE		-3(PC)
   181  		r0 := v.Args[0].Reg()
   182  		r1 := v.Args[1].Reg()
   183  		// LWSYNC - Assuming shared data not write-through-required nor
   184  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   185  		plwsync := s.Prog(ppc64.ALWSYNC)
   186  		plwsync.To.Type = obj.TYPE_NONE
   187  		p := s.Prog(ppc64.ALBAR)
   188  		p.From.Type = obj.TYPE_MEM
   189  		p.From.Reg = r0
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = ppc64.REGTMP
   192  		p1 := s.Prog(v.Op.Asm())
   193  		p1.From.Type = obj.TYPE_REG
   194  		p1.From.Reg = r1
   195  		p1.To.Type = obj.TYPE_REG
   196  		p1.To.Reg = ppc64.REGTMP
   197  		p2 := s.Prog(ppc64.ASTBCCC)
   198  		p2.From.Type = obj.TYPE_REG
   199  		p2.From.Reg = ppc64.REGTMP
   200  		p2.To.Type = obj.TYPE_MEM
   201  		p2.To.Reg = r0
   202  		p2.RegTo2 = ppc64.REGTMP
   203  		p3 := s.Prog(ppc64.ABNE)
   204  		p3.To.Type = obj.TYPE_BRANCH
   205  		gc.Patch(p3, p)
   206  
   207  	case ssa.OpPPC64LoweredAtomicAdd32,
   208  		ssa.OpPPC64LoweredAtomicAdd64:
   209  		// LWSYNC
   210  		// LDAR/LWAR    (Rarg0), Rout
   211  		// ADD		Rarg1, Rout
   212  		// STDCCC/STWCCC Rout, (Rarg0)
   213  		// BNE         -3(PC)
   214  		// MOVW		Rout,Rout (if Add32)
   215  		ld := ppc64.ALDAR
   216  		st := ppc64.ASTDCCC
   217  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   218  			ld = ppc64.ALWAR
   219  			st = ppc64.ASTWCCC
   220  		}
   221  		r0 := v.Args[0].Reg()
   222  		r1 := v.Args[1].Reg()
   223  		out := v.Reg0()
   224  		// LWSYNC - Assuming shared data not write-through-required nor
   225  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   226  		plwsync := s.Prog(ppc64.ALWSYNC)
   227  		plwsync.To.Type = obj.TYPE_NONE
   228  		// LDAR or LWAR
   229  		p := s.Prog(ld)
   230  		p.From.Type = obj.TYPE_MEM
   231  		p.From.Reg = r0
   232  		p.To.Type = obj.TYPE_REG
   233  		p.To.Reg = out
   234  		// ADD reg1,out
   235  		p1 := s.Prog(ppc64.AADD)
   236  		p1.From.Type = obj.TYPE_REG
   237  		p1.From.Reg = r1
   238  		p1.To.Reg = out
   239  		p1.To.Type = obj.TYPE_REG
   240  		// STDCCC or STWCCC
   241  		p3 := s.Prog(st)
   242  		p3.From.Type = obj.TYPE_REG
   243  		p3.From.Reg = out
   244  		p3.To.Type = obj.TYPE_MEM
   245  		p3.To.Reg = r0
   246  		// BNE retry
   247  		p4 := s.Prog(ppc64.ABNE)
   248  		p4.To.Type = obj.TYPE_BRANCH
   249  		gc.Patch(p4, p)
   250  
   251  		// Ensure a 32 bit result
   252  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   253  			p5 := s.Prog(ppc64.AMOVWZ)
   254  			p5.To.Type = obj.TYPE_REG
   255  			p5.To.Reg = out
   256  			p5.From.Type = obj.TYPE_REG
   257  			p5.From.Reg = out
   258  		}
   259  
   260  	case ssa.OpPPC64LoweredAtomicExchange32,
   261  		ssa.OpPPC64LoweredAtomicExchange64:
   262  		// LWSYNC
   263  		// LDAR/LWAR    (Rarg0), Rout
   264  		// STDCCC/STWCCC Rout, (Rarg0)
   265  		// BNE         -2(PC)
   266  		// ISYNC
   267  		ld := ppc64.ALDAR
   268  		st := ppc64.ASTDCCC
   269  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   270  			ld = ppc64.ALWAR
   271  			st = ppc64.ASTWCCC
   272  		}
   273  		r0 := v.Args[0].Reg()
   274  		r1 := v.Args[1].Reg()
   275  		out := v.Reg0()
   276  		// LWSYNC - Assuming shared data not write-through-required nor
   277  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   278  		plwsync := s.Prog(ppc64.ALWSYNC)
   279  		plwsync.To.Type = obj.TYPE_NONE
   280  		// LDAR or LWAR
   281  		p := s.Prog(ld)
   282  		p.From.Type = obj.TYPE_MEM
   283  		p.From.Reg = r0
   284  		p.To.Type = obj.TYPE_REG
   285  		p.To.Reg = out
   286  		// STDCCC or STWCCC
   287  		p1 := s.Prog(st)
   288  		p1.From.Type = obj.TYPE_REG
   289  		p1.From.Reg = r1
   290  		p1.To.Type = obj.TYPE_MEM
   291  		p1.To.Reg = r0
   292  		// BNE retry
   293  		p2 := s.Prog(ppc64.ABNE)
   294  		p2.To.Type = obj.TYPE_BRANCH
   295  		gc.Patch(p2, p)
   296  		// ISYNC
   297  		pisync := s.Prog(ppc64.AISYNC)
   298  		pisync.To.Type = obj.TYPE_NONE
   299  
   300  	case ssa.OpPPC64LoweredAtomicLoad32,
   301  		ssa.OpPPC64LoweredAtomicLoad64,
   302  		ssa.OpPPC64LoweredAtomicLoadPtr:
   303  		// SYNC
   304  		// MOVD/MOVW (Rarg0), Rout
   305  		// CMP Rout,Rout
   306  		// BNE 1(PC)
   307  		// ISYNC
   308  		ld := ppc64.AMOVD
   309  		cmp := ppc64.ACMP
   310  		if v.Op == ssa.OpPPC64LoweredAtomicLoad32 {
   311  			ld = ppc64.AMOVW
   312  			cmp = ppc64.ACMPW
   313  		}
   314  		arg0 := v.Args[0].Reg()
   315  		out := v.Reg0()
   316  		// SYNC when AuxInt == 1; otherwise, load-acquire
   317  		if v.AuxInt == 1 {
   318  			psync := s.Prog(ppc64.ASYNC)
   319  			psync.To.Type = obj.TYPE_NONE
   320  		}
   321  		// Load
   322  		p := s.Prog(ld)
   323  		p.From.Type = obj.TYPE_MEM
   324  		p.From.Reg = arg0
   325  		p.To.Type = obj.TYPE_REG
   326  		p.To.Reg = out
   327  		// CMP
   328  		p1 := s.Prog(cmp)
   329  		p1.From.Type = obj.TYPE_REG
   330  		p1.From.Reg = out
   331  		p1.To.Type = obj.TYPE_REG
   332  		p1.To.Reg = out
   333  		// BNE
   334  		p2 := s.Prog(ppc64.ABNE)
   335  		p2.To.Type = obj.TYPE_BRANCH
   336  		// ISYNC
   337  		pisync := s.Prog(ppc64.AISYNC)
   338  		pisync.To.Type = obj.TYPE_NONE
   339  		gc.Patch(p2, pisync)
   340  
   341  	case ssa.OpPPC64LoweredAtomicStore32,
   342  		ssa.OpPPC64LoweredAtomicStore64:
   343  		// SYNC or LWSYNC
   344  		// MOVD/MOVW arg1,(arg0)
   345  		st := ppc64.AMOVD
   346  		if v.Op == ssa.OpPPC64LoweredAtomicStore32 {
   347  			st = ppc64.AMOVW
   348  		}
   349  		arg0 := v.Args[0].Reg()
   350  		arg1 := v.Args[1].Reg()
   351  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   352  		// SYNC
   353  		syncOp := ppc64.ASYNC
   354  		if v.AuxInt == 0 {
   355  			syncOp = ppc64.ALWSYNC
   356  		}
   357  		psync := s.Prog(syncOp)
   358  		psync.To.Type = obj.TYPE_NONE
   359  		// Store
   360  		p := s.Prog(st)
   361  		p.To.Type = obj.TYPE_MEM
   362  		p.To.Reg = arg0
   363  		p.From.Type = obj.TYPE_REG
   364  		p.From.Reg = arg1
   365  
   366  	case ssa.OpPPC64LoweredAtomicCas64,
   367  		ssa.OpPPC64LoweredAtomicCas32:
   368  		// LWSYNC
   369  		// loop:
   370  		// LDAR        (Rarg0), MutexHint, Rtmp
   371  		// CMP         Rarg1, Rtmp
   372  		// BNE         fail
   373  		// STDCCC      Rarg2, (Rarg0)
   374  		// BNE         loop
   375  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   376  		// MOVD        $1, Rout
   377  		// BR          end
   378  		// fail:
   379  		// MOVD        $0, Rout
   380  		// end:
   381  		ld := ppc64.ALDAR
   382  		st := ppc64.ASTDCCC
   383  		cmp := ppc64.ACMP
   384  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   385  			ld = ppc64.ALWAR
   386  			st = ppc64.ASTWCCC
   387  			cmp = ppc64.ACMPW
   388  		}
   389  		r0 := v.Args[0].Reg()
   390  		r1 := v.Args[1].Reg()
   391  		r2 := v.Args[2].Reg()
   392  		out := v.Reg0()
   393  		// LWSYNC - Assuming shared data not write-through-required nor
   394  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   395  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   396  		plwsync1.To.Type = obj.TYPE_NONE
   397  		// LDAR or LWAR
   398  		p := s.Prog(ld)
   399  		p.From.Type = obj.TYPE_MEM
   400  		p.From.Reg = r0
   401  		p.To.Type = obj.TYPE_REG
   402  		p.To.Reg = ppc64.REGTMP
   403  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   404  		// the release hint.
   405  		if v.AuxInt == 0 {
   406  			p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
   407  		}
   408  		// CMP reg1,reg2
   409  		p1 := s.Prog(cmp)
   410  		p1.From.Type = obj.TYPE_REG
   411  		p1.From.Reg = r1
   412  		p1.To.Reg = ppc64.REGTMP
   413  		p1.To.Type = obj.TYPE_REG
   414  		// BNE cas_fail
   415  		p2 := s.Prog(ppc64.ABNE)
   416  		p2.To.Type = obj.TYPE_BRANCH
   417  		// STDCCC or STWCCC
   418  		p3 := s.Prog(st)
   419  		p3.From.Type = obj.TYPE_REG
   420  		p3.From.Reg = r2
   421  		p3.To.Type = obj.TYPE_MEM
   422  		p3.To.Reg = r0
   423  		// BNE retry
   424  		p4 := s.Prog(ppc64.ABNE)
   425  		p4.To.Type = obj.TYPE_BRANCH
   426  		gc.Patch(p4, p)
   427  		// LWSYNC - Assuming shared data not write-through-required nor
   428  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   429  		// If the operation is a CAS-Release, then synchronization is not necessary.
   430  		if v.AuxInt != 0 {
   431  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   432  			plwsync2.To.Type = obj.TYPE_NONE
   433  		}
   434  		// return true
   435  		p5 := s.Prog(ppc64.AMOVD)
   436  		p5.From.Type = obj.TYPE_CONST
   437  		p5.From.Offset = 1
   438  		p5.To.Type = obj.TYPE_REG
   439  		p5.To.Reg = out
   440  		// BR done
   441  		p6 := s.Prog(obj.AJMP)
   442  		p6.To.Type = obj.TYPE_BRANCH
   443  		// return false
   444  		p7 := s.Prog(ppc64.AMOVD)
   445  		p7.From.Type = obj.TYPE_CONST
   446  		p7.From.Offset = 0
   447  		p7.To.Type = obj.TYPE_REG
   448  		p7.To.Reg = out
   449  		gc.Patch(p2, p7)
   450  		// done (label)
   451  		p8 := s.Prog(obj.ANOP)
   452  		gc.Patch(p6, p8)
   453  
   454  	case ssa.OpPPC64LoweredGetClosurePtr:
   455  		// Closure pointer is R11 (already)
   456  		gc.CheckLoweredGetClosurePtr(v)
   457  
   458  	case ssa.OpPPC64LoweredGetCallerSP:
   459  		// caller's SP is FixedFrameSize below the address of the first arg
   460  		p := s.Prog(ppc64.AMOVD)
   461  		p.From.Type = obj.TYPE_ADDR
   462  		p.From.Offset = -gc.Ctxt.FixedFrameSize()
   463  		p.From.Name = obj.NAME_PARAM
   464  		p.To.Type = obj.TYPE_REG
   465  		p.To.Reg = v.Reg()
   466  
   467  	case ssa.OpPPC64LoweredGetCallerPC:
   468  		p := s.Prog(obj.AGETCALLERPC)
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  
   472  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   473  		// input is already rounded
   474  
   475  	case ssa.OpLoadReg:
   476  		loadOp := loadByType(v.Type)
   477  		p := s.Prog(loadOp)
   478  		gc.AddrAuto(&p.From, v.Args[0])
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  	case ssa.OpStoreReg:
   483  		storeOp := storeByType(v.Type)
   484  		p := s.Prog(storeOp)
   485  		p.From.Type = obj.TYPE_REG
   486  		p.From.Reg = v.Args[0].Reg()
   487  		gc.AddrAuto(&p.To, v)
   488  
   489  	case ssa.OpPPC64DIVD:
   490  		// For now,
   491  		//
   492  		// cmp arg1, -1
   493  		// be  ahead
   494  		// v = arg0 / arg1
   495  		// b over
   496  		// ahead: v = - arg0
   497  		// over: nop
   498  		r := v.Reg()
   499  		r0 := v.Args[0].Reg()
   500  		r1 := v.Args[1].Reg()
   501  
   502  		p := s.Prog(ppc64.ACMP)
   503  		p.From.Type = obj.TYPE_REG
   504  		p.From.Reg = r1
   505  		p.To.Type = obj.TYPE_CONST
   506  		p.To.Offset = -1
   507  
   508  		pbahead := s.Prog(ppc64.ABEQ)
   509  		pbahead.To.Type = obj.TYPE_BRANCH
   510  
   511  		p = s.Prog(v.Op.Asm())
   512  		p.From.Type = obj.TYPE_REG
   513  		p.From.Reg = r1
   514  		p.Reg = r0
   515  		p.To.Type = obj.TYPE_REG
   516  		p.To.Reg = r
   517  
   518  		pbover := s.Prog(obj.AJMP)
   519  		pbover.To.Type = obj.TYPE_BRANCH
   520  
   521  		p = s.Prog(ppc64.ANEG)
   522  		p.To.Type = obj.TYPE_REG
   523  		p.To.Reg = r
   524  		p.From.Type = obj.TYPE_REG
   525  		p.From.Reg = r0
   526  		gc.Patch(pbahead, p)
   527  
   528  		p = s.Prog(obj.ANOP)
   529  		gc.Patch(pbover, p)
   530  
   531  	case ssa.OpPPC64DIVW:
   532  		// word-width version of above
   533  		r := v.Reg()
   534  		r0 := v.Args[0].Reg()
   535  		r1 := v.Args[1].Reg()
   536  
   537  		p := s.Prog(ppc64.ACMPW)
   538  		p.From.Type = obj.TYPE_REG
   539  		p.From.Reg = r1
   540  		p.To.Type = obj.TYPE_CONST
   541  		p.To.Offset = -1
   542  
   543  		pbahead := s.Prog(ppc64.ABEQ)
   544  		pbahead.To.Type = obj.TYPE_BRANCH
   545  
   546  		p = s.Prog(v.Op.Asm())
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = r1
   549  		p.Reg = r0
   550  		p.To.Type = obj.TYPE_REG
   551  		p.To.Reg = r
   552  
   553  		pbover := s.Prog(obj.AJMP)
   554  		pbover.To.Type = obj.TYPE_BRANCH
   555  
   556  		p = s.Prog(ppc64.ANEG)
   557  		p.To.Type = obj.TYPE_REG
   558  		p.To.Reg = r
   559  		p.From.Type = obj.TYPE_REG
   560  		p.From.Reg = r0
   561  		gc.Patch(pbahead, p)
   562  
   563  		p = s.Prog(obj.ANOP)
   564  		gc.Patch(pbover, p)
   565  
   566  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   567  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   568  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   569  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   570  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   571  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   572  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
   573  		r := v.Reg()
   574  		r1 := v.Args[0].Reg()
   575  		r2 := v.Args[1].Reg()
   576  		p := s.Prog(v.Op.Asm())
   577  		p.From.Type = obj.TYPE_REG
   578  		p.From.Reg = r2
   579  		p.Reg = r1
   580  		p.To.Type = obj.TYPE_REG
   581  		p.To.Reg = r
   582  
   583  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   584  		p := s.Prog(v.Op.Asm())
   585  		p.From.Type = obj.TYPE_CONST
   586  		p.From.Offset = v.AuxInt
   587  		p.Reg = v.Args[0].Reg()
   588  		p.To.Type = obj.TYPE_REG
   589  		p.To.Reg = v.Reg()
   590  
   591  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   592  		r := v.Reg()
   593  		r1 := v.Args[0].Reg()
   594  		r2 := v.Args[1].Reg()
   595  		r3 := v.Args[2].Reg()
   596  		// r = r1*r2 ± r3
   597  		p := s.Prog(v.Op.Asm())
   598  		p.From.Type = obj.TYPE_REG
   599  		p.From.Reg = r1
   600  		p.Reg = r3
   601  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   602  		p.To.Type = obj.TYPE_REG
   603  		p.To.Reg = r
   604  
   605  	case ssa.OpPPC64MaskIfNotCarry:
   606  		r := v.Reg()
   607  		p := s.Prog(v.Op.Asm())
   608  		p.From.Type = obj.TYPE_REG
   609  		p.From.Reg = ppc64.REGZERO
   610  		p.To.Type = obj.TYPE_REG
   611  		p.To.Reg = r
   612  
   613  	case ssa.OpPPC64ADDconstForCarry:
   614  		r1 := v.Args[0].Reg()
   615  		p := s.Prog(v.Op.Asm())
   616  		p.Reg = r1
   617  		p.From.Type = obj.TYPE_CONST
   618  		p.From.Offset = v.AuxInt
   619  		p.To.Type = obj.TYPE_REG
   620  		p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
   621  
   622  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND:
   623  		r := v.Reg()
   624  		p := s.Prog(v.Op.Asm())
   625  		p.To.Type = obj.TYPE_REG
   626  		p.To.Reg = r
   627  		p.From.Type = obj.TYPE_REG
   628  		p.From.Reg = v.Args[0].Reg()
   629  
   630  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   631  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
   632  		p := s.Prog(v.Op.Asm())
   633  		p.Reg = v.Args[0].Reg()
   634  		p.From.Type = obj.TYPE_CONST
   635  		p.From.Offset = v.AuxInt
   636  		p.To.Type = obj.TYPE_REG
   637  		p.To.Reg = v.Reg()
   638  
   639  	case ssa.OpPPC64ANDCCconst:
   640  		p := s.Prog(v.Op.Asm())
   641  		p.Reg = v.Args[0].Reg()
   642  
   643  		if v.Aux != nil {
   644  			p.From.Type = obj.TYPE_CONST
   645  			p.From.Offset = gc.AuxOffset(v)
   646  		} else {
   647  			p.From.Type = obj.TYPE_CONST
   648  			p.From.Offset = v.AuxInt
   649  		}
   650  
   651  		p.To.Type = obj.TYPE_REG
   652  		p.To.Reg = ppc64.REGTMP // discard result
   653  
   654  	case ssa.OpPPC64MOVDaddr:
   655  		switch v.Aux.(type) {
   656  		default:
   657  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   658  		case nil:
   659  			// If aux offset and aux int are both 0, and the same
   660  			// input and output regs are used, no instruction
   661  			// needs to be generated, since it would just be
   662  			// addi rx, rx, 0.
   663  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   664  				p := s.Prog(ppc64.AMOVD)
   665  				p.From.Type = obj.TYPE_ADDR
   666  				p.From.Reg = v.Args[0].Reg()
   667  				p.From.Offset = v.AuxInt
   668  				p.To.Type = obj.TYPE_REG
   669  				p.To.Reg = v.Reg()
   670  			}
   671  
   672  		case *obj.LSym, *gc.Node:
   673  			p := s.Prog(ppc64.AMOVD)
   674  			p.From.Type = obj.TYPE_ADDR
   675  			p.From.Reg = v.Args[0].Reg()
   676  			p.To.Type = obj.TYPE_REG
   677  			p.To.Reg = v.Reg()
   678  			gc.AddAux(&p.From, v)
   679  
   680  		}
   681  
   682  	case ssa.OpPPC64MOVDconst:
   683  		p := s.Prog(v.Op.Asm())
   684  		p.From.Type = obj.TYPE_CONST
   685  		p.From.Offset = v.AuxInt
   686  		p.To.Type = obj.TYPE_REG
   687  		p.To.Reg = v.Reg()
   688  
   689  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   690  		p := s.Prog(v.Op.Asm())
   691  		p.From.Type = obj.TYPE_FCONST
   692  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   693  		p.To.Type = obj.TYPE_REG
   694  		p.To.Reg = v.Reg()
   695  
   696  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   697  		p := s.Prog(v.Op.Asm())
   698  		p.From.Type = obj.TYPE_REG
   699  		p.From.Reg = v.Args[0].Reg()
   700  		p.To.Type = obj.TYPE_REG
   701  		p.To.Reg = v.Args[1].Reg()
   702  
   703  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   704  		p := s.Prog(v.Op.Asm())
   705  		p.From.Type = obj.TYPE_REG
   706  		p.From.Reg = v.Args[0].Reg()
   707  		p.To.Type = obj.TYPE_CONST
   708  		p.To.Offset = v.AuxInt
   709  
   710  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   711  		// Shift in register to required size
   712  		p := s.Prog(v.Op.Asm())
   713  		p.From.Type = obj.TYPE_REG
   714  		p.From.Reg = v.Args[0].Reg()
   715  		p.To.Reg = v.Reg()
   716  		p.To.Type = obj.TYPE_REG
   717  
   718  	case ssa.OpPPC64MOVDload:
   719  
   720  		// MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
   721  		// For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
   722  		// the offset is not known until link time. If the load of a go.string uses relocation for the
   723  		// offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
   724  		// To avoid this problem, the full address of the go.string is computed and loaded into the base register,
   725  		// and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
   726  		// go.string types because other types will have proper alignment.
   727  
   728  		gostring := false
   729  		switch n := v.Aux.(type) {
   730  		case *obj.LSym:
   731  			gostring = strings.HasPrefix(n.Name, "go.string.")
   732  		}
   733  		if gostring {
   734  			// Generate full addr of the go.string const
   735  			// including AuxInt
   736  			p := s.Prog(ppc64.AMOVD)
   737  			p.From.Type = obj.TYPE_ADDR
   738  			p.From.Reg = v.Args[0].Reg()
   739  			gc.AddAux(&p.From, v)
   740  			p.To.Type = obj.TYPE_REG
   741  			p.To.Reg = v.Reg()
   742  			// Load go.string using 0 offset
   743  			p = s.Prog(v.Op.Asm())
   744  			p.From.Type = obj.TYPE_MEM
   745  			p.From.Reg = v.Reg()
   746  			p.To.Type = obj.TYPE_REG
   747  			p.To.Reg = v.Reg()
   748  			break
   749  		}
   750  		// Not a go.string, generate a normal load
   751  		fallthrough
   752  
   753  	case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   754  		p := s.Prog(v.Op.Asm())
   755  		p.From.Type = obj.TYPE_MEM
   756  		p.From.Reg = v.Args[0].Reg()
   757  		gc.AddAux(&p.From, v)
   758  		p.To.Type = obj.TYPE_REG
   759  		p.To.Reg = v.Reg()
   760  
   761  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   762  		p := s.Prog(v.Op.Asm())
   763  		p.From.Type = obj.TYPE_MEM
   764  		p.From.Reg = v.Args[0].Reg()
   765  		p.To.Type = obj.TYPE_REG
   766  		p.To.Reg = v.Reg()
   767  
   768  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   769  		p := s.Prog(v.Op.Asm())
   770  		p.To.Type = obj.TYPE_MEM
   771  		p.To.Reg = v.Args[0].Reg()
   772  		p.From.Type = obj.TYPE_REG
   773  		p.From.Reg = v.Args[1].Reg()
   774  
   775  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   776  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   777  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   778  		p := s.Prog(v.Op.Asm())
   779  		p.From.Type = obj.TYPE_MEM
   780  		p.From.Reg = v.Args[0].Reg()
   781  		p.From.Index = v.Args[1].Reg()
   782  		gc.AddAux(&p.From, v)
   783  		p.To.Type = obj.TYPE_REG
   784  		p.To.Reg = v.Reg()
   785  
   786  	case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   787  		p := s.Prog(v.Op.Asm())
   788  		p.From.Type = obj.TYPE_REG
   789  		p.From.Reg = ppc64.REGZERO
   790  		p.To.Type = obj.TYPE_MEM
   791  		p.To.Reg = v.Args[0].Reg()
   792  		gc.AddAux(&p.To, v)
   793  
   794  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   795  		p := s.Prog(v.Op.Asm())
   796  		p.From.Type = obj.TYPE_REG
   797  		p.From.Reg = v.Args[1].Reg()
   798  		p.To.Type = obj.TYPE_MEM
   799  		p.To.Reg = v.Args[0].Reg()
   800  		gc.AddAux(&p.To, v)
   801  
   802  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   803  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   804  		ssa.OpPPC64MOVHBRstoreidx:
   805  		p := s.Prog(v.Op.Asm())
   806  		p.From.Type = obj.TYPE_REG
   807  		p.From.Reg = v.Args[2].Reg()
   808  		p.To.Index = v.Args[1].Reg()
   809  		p.To.Type = obj.TYPE_MEM
   810  		p.To.Reg = v.Args[0].Reg()
   811  		gc.AddAux(&p.To, v)
   812  
   813  	case ssa.OpPPC64Equal,
   814  		ssa.OpPPC64NotEqual,
   815  		ssa.OpPPC64LessThan,
   816  		ssa.OpPPC64FLessThan,
   817  		ssa.OpPPC64LessEqual,
   818  		ssa.OpPPC64GreaterThan,
   819  		ssa.OpPPC64FGreaterThan,
   820  		ssa.OpPPC64GreaterEqual:
   821  
   822  		// On Power7 or later, can use isel instruction:
   823  		// for a < b, a > b, a = b:
   824  		//   rtmp := 1
   825  		//   isel rt,rtmp,r0,cond // rt is target in ppc asm
   826  
   827  		// for  a >= b, a <= b, a != b:
   828  		//   rtmp := 1
   829  		//   isel rt,0,rtmp,!cond // rt is target in ppc asm
   830  
   831  		p := s.Prog(ppc64.AMOVD)
   832  		p.From.Type = obj.TYPE_CONST
   833  		p.From.Offset = 1
   834  		p.To.Type = obj.TYPE_REG
   835  		p.To.Reg = iselRegs[1]
   836  		iop := iselOps[v.Op]
   837  		ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   838  
   839  	case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
   840  		ssa.OpPPC64FGreaterEqual:
   841  
   842  		p := s.Prog(ppc64.AMOVD)
   843  		p.From.Type = obj.TYPE_CONST
   844  		p.From.Offset = 1
   845  		p.To.Type = obj.TYPE_REG
   846  		p.To.Reg = iselRegs[1]
   847  		iop := iselOps[v.Op]
   848  		ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   849  		ssaGenISEL(s, v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
   850  
   851  	case ssa.OpPPC64LoweredZero:
   852  
   853  		// unaligned data doesn't hurt performance
   854  		// for these instructions on power8 or later
   855  
   856  		// for sizes >= 64 generate a loop as follows:
   857  
   858  		// set up loop counter in CTR, used by BC
   859  		//	 MOVD len/32,REG_TMP
   860  		//	 MOVD REG_TMP,CTR
   861  		//	 loop:
   862  		//	 MOVD R0,(R3)
   863  		//	 MOVD R0,8(R3)
   864  		//	 MOVD R0,16(R3)
   865  		//	 MOVD R0,24(R3)
   866  		//	 ADD  $32,R3
   867  		//	 BC   16, 0, loop
   868  		//
   869  		// any remainder is done as described below
   870  
   871  		// for sizes < 64 bytes, first clear as many doublewords as possible,
   872  		// then handle the remainder
   873  		//	MOVD R0,(R3)
   874  		//	MOVD R0,8(R3)
   875  		// .... etc.
   876  		//
   877  		// the remainder bytes are cleared using one or more
   878  		// of the following instructions with the appropriate
   879  		// offsets depending which instructions are needed
   880  		//
   881  		//	MOVW R0,n1(R3)	4 bytes
   882  		//	MOVH R0,n2(R3)	2 bytes
   883  		//	MOVB R0,n3(R3)	1 byte
   884  		//
   885  		// 7 bytes: MOVW, MOVH, MOVB
   886  		// 6 bytes: MOVW, MOVH
   887  		// 5 bytes: MOVW, MOVB
   888  		// 3 bytes: MOVH, MOVB
   889  
   890  		// each loop iteration does 32 bytes
   891  		ctr := v.AuxInt / 32
   892  
   893  		// remainder bytes
   894  		rem := v.AuxInt % 32
   895  
   896  		// only generate a loop if there is more
   897  		// than 1 iteration.
   898  		if ctr > 1 {
   899  			// Set up CTR loop counter
   900  			p := s.Prog(ppc64.AMOVD)
   901  			p.From.Type = obj.TYPE_CONST
   902  			p.From.Offset = ctr
   903  			p.To.Type = obj.TYPE_REG
   904  			p.To.Reg = ppc64.REGTMP
   905  
   906  			p = s.Prog(ppc64.AMOVD)
   907  			p.From.Type = obj.TYPE_REG
   908  			p.From.Reg = ppc64.REGTMP
   909  			p.To.Type = obj.TYPE_REG
   910  			p.To.Reg = ppc64.REG_CTR
   911  
   912  			// generate 4 MOVDs
   913  			// when this is a loop then the top must be saved
   914  			var top *obj.Prog
   915  			for offset := int64(0); offset < 32; offset += 8 {
   916  				// This is the top of loop
   917  				p := s.Prog(ppc64.AMOVD)
   918  				p.From.Type = obj.TYPE_REG
   919  				p.From.Reg = ppc64.REG_R0
   920  				p.To.Type = obj.TYPE_MEM
   921  				p.To.Reg = v.Args[0].Reg()
   922  				p.To.Offset = offset
   923  				// Save the top of loop
   924  				if top == nil {
   925  					top = p
   926  				}
   927  			}
   928  
   929  			// Increment address for the
   930  			// 4 doublewords just zeroed.
   931  			p = s.Prog(ppc64.AADD)
   932  			p.Reg = v.Args[0].Reg()
   933  			p.From.Type = obj.TYPE_CONST
   934  			p.From.Offset = 32
   935  			p.To.Type = obj.TYPE_REG
   936  			p.To.Reg = v.Args[0].Reg()
   937  
   938  			// Branch back to top of loop
   939  			// based on CTR
   940  			// BC with BO_BCTR generates bdnz
   941  			p = s.Prog(ppc64.ABC)
   942  			p.From.Type = obj.TYPE_CONST
   943  			p.From.Offset = ppc64.BO_BCTR
   944  			p.Reg = ppc64.REG_R0
   945  			p.To.Type = obj.TYPE_BRANCH
   946  			gc.Patch(p, top)
   947  		}
   948  
   949  		// when ctr == 1 the loop was not generated but
   950  		// there are at least 32 bytes to clear, so add
   951  		// that to the remainder to generate the code
   952  		// to clear those doublewords
   953  		if ctr == 1 {
   954  			rem += 32
   955  		}
   956  
   957  		// clear the remainder starting at offset zero
   958  		offset := int64(0)
   959  
   960  		// first clear as many doublewords as possible
   961  		// then clear remaining sizes as available
   962  		for rem > 0 {
   963  			op, size := ppc64.AMOVB, int64(1)
   964  			switch {
   965  			case rem >= 8:
   966  				op, size = ppc64.AMOVD, 8
   967  			case rem >= 4:
   968  				op, size = ppc64.AMOVW, 4
   969  			case rem >= 2:
   970  				op, size = ppc64.AMOVH, 2
   971  			}
   972  			p := s.Prog(op)
   973  			p.From.Type = obj.TYPE_REG
   974  			p.From.Reg = ppc64.REG_R0
   975  			p.To.Type = obj.TYPE_MEM
   976  			p.To.Reg = v.Args[0].Reg()
   977  			p.To.Offset = offset
   978  			rem -= size
   979  			offset += size
   980  		}
   981  
   982  	case ssa.OpPPC64LoweredMove:
   983  
   984  		// This will be used when moving more
   985  		// than 8 bytes.  Moves start with
   986  		// as many 8 byte moves as possible, then
   987  		// 4, 2, or 1 byte(s) as remaining.  This will
   988  		// work and be efficient for power8 or later.
   989  		// If there are 64 or more bytes, then a
   990  		// loop is generated to move 32 bytes and
   991  		// update the src and dst addresses on each
   992  		// iteration. When < 64 bytes, the appropriate
   993  		// number of moves are generated based on the
   994  		// size.
   995  		// When moving >= 64 bytes a loop is used
   996  		//	MOVD len/32,REG_TMP
   997  		//	MOVD REG_TMP,CTR
   998  		// top:
   999  		//	MOVD (R4),R7
  1000  		//	MOVD 8(R4),R8
  1001  		//	MOVD 16(R4),R9
  1002  		//	MOVD 24(R4),R10
  1003  		//	ADD  R4,$32
  1004  		//	MOVD R7,(R3)
  1005  		//	MOVD R8,8(R3)
  1006  		//	MOVD R9,16(R3)
  1007  		//	MOVD R10,24(R3)
  1008  		//	ADD  R3,$32
  1009  		//	BC 16,0,top
  1010  		// Bytes not moved by this loop are moved
  1011  		// with a combination of the following instructions,
  1012  		// starting with the largest sizes and generating as
  1013  		// many as needed, using the appropriate offset value.
  1014  		//	MOVD  n(R4),R7
  1015  		//	MOVD  R7,n(R3)
  1016  		//	MOVW  n1(R4),R7
  1017  		//	MOVW  R7,n1(R3)
  1018  		//	MOVH  n2(R4),R7
  1019  		//	MOVH  R7,n2(R3)
  1020  		//	MOVB  n3(R4),R7
  1021  		//	MOVB  R7,n3(R3)
  1022  
  1023  		// Each loop iteration moves 32 bytes
  1024  		ctr := v.AuxInt / 32
  1025  
  1026  		// Remainder after the loop
  1027  		rem := v.AuxInt % 32
  1028  
  1029  		dst_reg := v.Args[0].Reg()
  1030  		src_reg := v.Args[1].Reg()
  1031  
  1032  		// The set of registers used here, must match the clobbered reg list
  1033  		// in PPC64Ops.go.
  1034  		useregs := []int16{ppc64.REG_R7, ppc64.REG_R8, ppc64.REG_R9, ppc64.REG_R10}
  1035  		offset := int64(0)
  1036  
  1037  		// top of the loop
  1038  		var top *obj.Prog
  1039  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1040  		if ctr > 1 {
  1041  			// Set up the CTR
  1042  			p := s.Prog(ppc64.AMOVD)
  1043  			p.From.Type = obj.TYPE_CONST
  1044  			p.From.Offset = ctr
  1045  			p.To.Type = obj.TYPE_REG
  1046  			p.To.Reg = ppc64.REGTMP
  1047  
  1048  			p = s.Prog(ppc64.AMOVD)
  1049  			p.From.Type = obj.TYPE_REG
  1050  			p.From.Reg = ppc64.REGTMP
  1051  			p.To.Type = obj.TYPE_REG
  1052  			p.To.Reg = ppc64.REG_CTR
  1053  
  1054  			// Generate all the MOVDs for loads
  1055  			// based off the same register, increasing
  1056  			// the offset by 8 for each instruction
  1057  			for _, rg := range useregs {
  1058  				p := s.Prog(ppc64.AMOVD)
  1059  				p.From.Type = obj.TYPE_MEM
  1060  				p.From.Reg = src_reg
  1061  				p.From.Offset = offset
  1062  				p.To.Type = obj.TYPE_REG
  1063  				p.To.Reg = rg
  1064  				if top == nil {
  1065  					top = p
  1066  				}
  1067  				offset += 8
  1068  			}
  1069  			// increment the src_reg for next iteration
  1070  			p = s.Prog(ppc64.AADD)
  1071  			p.Reg = src_reg
  1072  			p.From.Type = obj.TYPE_CONST
  1073  			p.From.Offset = 32
  1074  			p.To.Type = obj.TYPE_REG
  1075  			p.To.Reg = src_reg
  1076  
  1077  			// generate the MOVDs for stores, based
  1078  			// off the same register, using the same
  1079  			// offsets as in the loads.
  1080  			offset = int64(0)
  1081  			for _, rg := range useregs {
  1082  				p := s.Prog(ppc64.AMOVD)
  1083  				p.From.Type = obj.TYPE_REG
  1084  				p.From.Reg = rg
  1085  				p.To.Type = obj.TYPE_MEM
  1086  				p.To.Reg = dst_reg
  1087  				p.To.Offset = offset
  1088  				offset += 8
  1089  			}
  1090  			// increment the dst_reg for next iteration
  1091  			p = s.Prog(ppc64.AADD)
  1092  			p.Reg = dst_reg
  1093  			p.From.Type = obj.TYPE_CONST
  1094  			p.From.Offset = 32
  1095  			p.To.Type = obj.TYPE_REG
  1096  			p.To.Reg = dst_reg
  1097  
  1098  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1099  			// to loop top.
  1100  			p = s.Prog(ppc64.ABC)
  1101  			p.From.Type = obj.TYPE_CONST
  1102  			p.From.Offset = ppc64.BO_BCTR
  1103  			p.Reg = ppc64.REG_R0
  1104  			p.To.Type = obj.TYPE_BRANCH
  1105  			gc.Patch(p, top)
  1106  
  1107  			// src_reg and dst_reg were incremented in the loop, so
  1108  			// later instructions start with offset 0.
  1109  			offset = int64(0)
  1110  		}
  1111  
  1112  		// No loop was generated for one iteration, so
  1113  		// add 32 bytes to the remainder to move those bytes.
  1114  		if ctr == 1 {
  1115  			rem += 32
  1116  		}
  1117  
  1118  		// Generate all the remaining load and store pairs, starting with
  1119  		// as many 8 byte moves as possible, then 4, 2, 1.
  1120  		for rem > 0 {
  1121  			op, size := ppc64.AMOVB, int64(1)
  1122  			switch {
  1123  			case rem >= 8:
  1124  				op, size = ppc64.AMOVD, 8
  1125  			case rem >= 4:
  1126  				op, size = ppc64.AMOVW, 4
  1127  			case rem >= 2:
  1128  				op, size = ppc64.AMOVH, 2
  1129  			}
  1130  			// Load
  1131  			p := s.Prog(op)
  1132  			p.To.Type = obj.TYPE_REG
  1133  			p.To.Reg = ppc64.REG_R7
  1134  			p.From.Type = obj.TYPE_MEM
  1135  			p.From.Reg = src_reg
  1136  			p.From.Offset = offset
  1137  
  1138  			// Store
  1139  			p = s.Prog(op)
  1140  			p.From.Type = obj.TYPE_REG
  1141  			p.From.Reg = ppc64.REG_R7
  1142  			p.To.Type = obj.TYPE_MEM
  1143  			p.To.Reg = dst_reg
  1144  			p.To.Offset = offset
  1145  			rem -= size
  1146  			offset += size
  1147  		}
  1148  
  1149  	case ssa.OpPPC64CALLstatic:
  1150  		s.Call(v)
  1151  
  1152  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1153  		p := s.Prog(ppc64.AMOVD)
  1154  		p.From.Type = obj.TYPE_REG
  1155  		p.From.Reg = v.Args[0].Reg()
  1156  		p.To.Type = obj.TYPE_REG
  1157  		p.To.Reg = ppc64.REG_CTR
  1158  
  1159  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1160  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1161  		}
  1162  
  1163  		pp := s.Call(v)
  1164  		pp.To.Reg = ppc64.REG_CTR
  1165  
  1166  		if gc.Ctxt.Flag_shared {
  1167  			// When compiling Go into PIC, the function we just
  1168  			// called via pointer might have been implemented in
  1169  			// a separate module and so overwritten the TOC
  1170  			// pointer in R2; reload it.
  1171  			q := s.Prog(ppc64.AMOVD)
  1172  			q.From.Type = obj.TYPE_MEM
  1173  			q.From.Offset = 24
  1174  			q.From.Reg = ppc64.REGSP
  1175  			q.To.Type = obj.TYPE_REG
  1176  			q.To.Reg = ppc64.REG_R2
  1177  		}
  1178  
  1179  	case ssa.OpPPC64LoweredWB:
  1180  		p := s.Prog(obj.ACALL)
  1181  		p.To.Type = obj.TYPE_MEM
  1182  		p.To.Name = obj.NAME_EXTERN
  1183  		p.To.Sym = v.Aux.(*obj.LSym)
  1184  
  1185  	case ssa.OpPPC64LoweredNilCheck:
  1186  		// Issue a load which will fault if arg is nil.
  1187  		p := s.Prog(ppc64.AMOVBZ)
  1188  		p.From.Type = obj.TYPE_MEM
  1189  		p.From.Reg = v.Args[0].Reg()
  1190  		gc.AddAux(&p.From, v)
  1191  		p.To.Type = obj.TYPE_REG
  1192  		p.To.Reg = ppc64.REGTMP
  1193  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1194  			gc.Warnl(v.Pos, "generated nil check")
  1195  		}
  1196  
  1197  	case ssa.OpPPC64InvertFlags:
  1198  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1199  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1200  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1201  	case ssa.OpClobber:
  1202  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1203  	default:
  1204  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1205  	}
  1206  }
  1207  
  1208  var blockJump = [...]struct {
  1209  	asm, invasm     obj.As
  1210  	asmeq, invasmun bool
  1211  }{
  1212  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1213  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1214  
  1215  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1216  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1217  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1218  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1219  
  1220  	// TODO: need to work FP comparisons into block jumps
  1221  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1222  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1223  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1224  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1225  }
  1226  
  1227  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1228  	switch b.Kind {
  1229  	case ssa.BlockDefer:
  1230  		// defer returns in R3:
  1231  		// 0 if we should continue executing
  1232  		// 1 if we should jump to deferreturn call
  1233  		p := s.Prog(ppc64.ACMP)
  1234  		p.From.Type = obj.TYPE_REG
  1235  		p.From.Reg = ppc64.REG_R3
  1236  		p.To.Type = obj.TYPE_REG
  1237  		p.To.Reg = ppc64.REG_R0
  1238  
  1239  		p = s.Prog(ppc64.ABNE)
  1240  		p.To.Type = obj.TYPE_BRANCH
  1241  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1242  		if b.Succs[0].Block() != next {
  1243  			p := s.Prog(obj.AJMP)
  1244  			p.To.Type = obj.TYPE_BRANCH
  1245  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1246  		}
  1247  
  1248  	case ssa.BlockPlain:
  1249  		if b.Succs[0].Block() != next {
  1250  			p := s.Prog(obj.AJMP)
  1251  			p.To.Type = obj.TYPE_BRANCH
  1252  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1253  		}
  1254  	case ssa.BlockExit:
  1255  		s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
  1256  	case ssa.BlockRet:
  1257  		s.Prog(obj.ARET)
  1258  	case ssa.BlockRetJmp:
  1259  		p := s.Prog(obj.AJMP)
  1260  		p.To.Type = obj.TYPE_MEM
  1261  		p.To.Name = obj.NAME_EXTERN
  1262  		p.To.Sym = b.Aux.(*obj.LSym)
  1263  
  1264  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  1265  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  1266  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  1267  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  1268  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  1269  		jmp := blockJump[b.Kind]
  1270  		switch next {
  1271  		case b.Succs[0].Block():
  1272  			s.Br(jmp.invasm, b.Succs[1].Block())
  1273  			if jmp.invasmun {
  1274  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1275  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  1276  			}
  1277  		case b.Succs[1].Block():
  1278  			s.Br(jmp.asm, b.Succs[0].Block())
  1279  			if jmp.asmeq {
  1280  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1281  			}
  1282  		default:
  1283  			if b.Likely != ssa.BranchUnlikely {
  1284  				s.Br(jmp.asm, b.Succs[0].Block())
  1285  				if jmp.asmeq {
  1286  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1287  				}
  1288  				s.Br(obj.AJMP, b.Succs[1].Block())
  1289  			} else {
  1290  				s.Br(jmp.invasm, b.Succs[1].Block())
  1291  				if jmp.invasmun {
  1292  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1293  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  1294  				}
  1295  				s.Br(obj.AJMP, b.Succs[0].Block())
  1296  			}
  1297  		}
  1298  	default:
  1299  		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1300  	}
  1301  }