github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ppc64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
     9  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ir"
    10  	"github.com/bir3/gocompiler/src/cmd/compile/internal/logopt"
    11  	"github.com/bir3/gocompiler/src/cmd/compile/internal/objw"
    12  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssa"
    13  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen"
    14  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
    15  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    16  	"github.com/bir3/gocompiler/src/cmd/internal/obj/ppc64"
    17  	"github.com/bir3/gocompiler/src/internal/buildcfg"
    18  	"math"
    19  	"strings"
    20  )
    21  
    22  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    23  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    24  	//	flive := b.FlagsLiveAtEnd
    25  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    26  	//		flive = true
    27  	//	}
    28  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    29  	//		v := b.Values[i]
    30  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    31  	//			// The "mark" is any non-nil Aux value.
    32  	//			v.Aux = v
    33  	//		}
    34  	//		if v.Type.IsFlags() {
    35  	//			flive = false
    36  	//		}
    37  	//		for _, a := range v.Args {
    38  	//			if a.Type.IsFlags() {
    39  	//				flive = true
    40  	//			}
    41  	//		}
    42  	//	}
    43  }
    44  
    45  // loadByType returns the load instruction of the given type.
    46  func loadByType(t *types.Type) obj.As {
    47  	if t.IsFloat() {
    48  		switch t.Size() {
    49  		case 4:
    50  			return ppc64.AFMOVS
    51  		case 8:
    52  			return ppc64.AFMOVD
    53  		}
    54  	} else {
    55  		switch t.Size() {
    56  		case 1:
    57  			if t.IsSigned() {
    58  				return ppc64.AMOVB
    59  			} else {
    60  				return ppc64.AMOVBZ
    61  			}
    62  		case 2:
    63  			if t.IsSigned() {
    64  				return ppc64.AMOVH
    65  			} else {
    66  				return ppc64.AMOVHZ
    67  			}
    68  		case 4:
    69  			if t.IsSigned() {
    70  				return ppc64.AMOVW
    71  			} else {
    72  				return ppc64.AMOVWZ
    73  			}
    74  		case 8:
    75  			return ppc64.AMOVD
    76  		}
    77  	}
    78  	panic("bad load type")
    79  }
    80  
    81  // storeByType returns the store instruction of the given type.
    82  func storeByType(t *types.Type) obj.As {
    83  	if t.IsFloat() {
    84  		switch t.Size() {
    85  		case 4:
    86  			return ppc64.AFMOVS
    87  		case 8:
    88  			return ppc64.AFMOVD
    89  		}
    90  	} else {
    91  		switch t.Size() {
    92  		case 1:
    93  			return ppc64.AMOVB
    94  		case 2:
    95  			return ppc64.AMOVH
    96  		case 4:
    97  			return ppc64.AMOVW
    98  		case 8:
    99  			return ppc64.AMOVD
   100  		}
   101  	}
   102  	panic("bad store type")
   103  }
   104  
   105  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   106  	switch v.Op {
   107  	case ssa.OpCopy:
   108  		t := v.Type
   109  		if t.IsMemory() {
   110  			return
   111  		}
   112  		x := v.Args[0].Reg()
   113  		y := v.Reg()
   114  		if x != y {
   115  			rt := obj.TYPE_REG
   116  			op := ppc64.AMOVD
   117  
   118  			if t.IsFloat() {
   119  				op = ppc64.AFMOVD
   120  			}
   121  			p := s.Prog(op)
   122  			p.From.Type = rt
   123  			p.From.Reg = x
   124  			p.To.Type = rt
   125  			p.To.Reg = y
   126  		}
   127  
   128  	case ssa.OpPPC64LoweredAtomicAnd8,
   129  		ssa.OpPPC64LoweredAtomicAnd32,
   130  		ssa.OpPPC64LoweredAtomicOr8,
   131  		ssa.OpPPC64LoweredAtomicOr32:
   132  		// LWSYNC
   133  		// LBAR/LWAR	(Rarg0), Rtmp
   134  		// AND/OR	Rarg1, Rtmp
   135  		// STBCCC/STWCCC Rtmp, (Rarg0)
   136  		// BNE		-3(PC)
   137  		ld := ppc64.ALBAR
   138  		st := ppc64.ASTBCCC
   139  		if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
   140  			ld = ppc64.ALWAR
   141  			st = ppc64.ASTWCCC
   142  		}
   143  		r0 := v.Args[0].Reg()
   144  		r1 := v.Args[1].Reg()
   145  		// LWSYNC - Assuming shared data not write-through-required nor
   146  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   147  		plwsync := s.Prog(ppc64.ALWSYNC)
   148  		plwsync.To.Type = obj.TYPE_NONE
   149  		// LBAR or LWAR
   150  		p := s.Prog(ld)
   151  		p.From.Type = obj.TYPE_MEM
   152  		p.From.Reg = r0
   153  		p.To.Type = obj.TYPE_REG
   154  		p.To.Reg = ppc64.REGTMP
   155  		// AND/OR reg1,out
   156  		p1 := s.Prog(v.Op.Asm())
   157  		p1.From.Type = obj.TYPE_REG
   158  		p1.From.Reg = r1
   159  		p1.To.Type = obj.TYPE_REG
   160  		p1.To.Reg = ppc64.REGTMP
   161  		// STBCCC or STWCCC
   162  		p2 := s.Prog(st)
   163  		p2.From.Type = obj.TYPE_REG
   164  		p2.From.Reg = ppc64.REGTMP
   165  		p2.To.Type = obj.TYPE_MEM
   166  		p2.To.Reg = r0
   167  		p2.RegTo2 = ppc64.REGTMP
   168  		// BNE retry
   169  		p3 := s.Prog(ppc64.ABNE)
   170  		p3.To.Type = obj.TYPE_BRANCH
   171  		p3.To.SetTarget(p)
   172  
   173  	case ssa.OpPPC64LoweredAtomicAdd32,
   174  		ssa.OpPPC64LoweredAtomicAdd64:
   175  		// LWSYNC
   176  		// LDAR/LWAR    (Rarg0), Rout
   177  		// ADD		Rarg1, Rout
   178  		// STDCCC/STWCCC Rout, (Rarg0)
   179  		// BNE         -3(PC)
   180  		// MOVW		Rout,Rout (if Add32)
   181  		ld := ppc64.ALDAR
   182  		st := ppc64.ASTDCCC
   183  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   184  			ld = ppc64.ALWAR
   185  			st = ppc64.ASTWCCC
   186  		}
   187  		r0 := v.Args[0].Reg()
   188  		r1 := v.Args[1].Reg()
   189  		out := v.Reg0()
   190  		// LWSYNC - Assuming shared data not write-through-required nor
   191  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   192  		plwsync := s.Prog(ppc64.ALWSYNC)
   193  		plwsync.To.Type = obj.TYPE_NONE
   194  		// LDAR or LWAR
   195  		p := s.Prog(ld)
   196  		p.From.Type = obj.TYPE_MEM
   197  		p.From.Reg = r0
   198  		p.To.Type = obj.TYPE_REG
   199  		p.To.Reg = out
   200  		// ADD reg1,out
   201  		p1 := s.Prog(ppc64.AADD)
   202  		p1.From.Type = obj.TYPE_REG
   203  		p1.From.Reg = r1
   204  		p1.To.Reg = out
   205  		p1.To.Type = obj.TYPE_REG
   206  		// STDCCC or STWCCC
   207  		p3 := s.Prog(st)
   208  		p3.From.Type = obj.TYPE_REG
   209  		p3.From.Reg = out
   210  		p3.To.Type = obj.TYPE_MEM
   211  		p3.To.Reg = r0
   212  		// BNE retry
   213  		p4 := s.Prog(ppc64.ABNE)
   214  		p4.To.Type = obj.TYPE_BRANCH
   215  		p4.To.SetTarget(p)
   216  
   217  		// Ensure a 32 bit result
   218  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   219  			p5 := s.Prog(ppc64.AMOVWZ)
   220  			p5.To.Type = obj.TYPE_REG
   221  			p5.To.Reg = out
   222  			p5.From.Type = obj.TYPE_REG
   223  			p5.From.Reg = out
   224  		}
   225  
   226  	case ssa.OpPPC64LoweredAtomicExchange32,
   227  		ssa.OpPPC64LoweredAtomicExchange64:
   228  		// LWSYNC
   229  		// LDAR/LWAR    (Rarg0), Rout
   230  		// STDCCC/STWCCC Rout, (Rarg0)
   231  		// BNE         -2(PC)
   232  		// ISYNC
   233  		ld := ppc64.ALDAR
   234  		st := ppc64.ASTDCCC
   235  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   236  			ld = ppc64.ALWAR
   237  			st = ppc64.ASTWCCC
   238  		}
   239  		r0 := v.Args[0].Reg()
   240  		r1 := v.Args[1].Reg()
   241  		out := v.Reg0()
   242  		// LWSYNC - Assuming shared data not write-through-required nor
   243  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   244  		plwsync := s.Prog(ppc64.ALWSYNC)
   245  		plwsync.To.Type = obj.TYPE_NONE
   246  		// LDAR or LWAR
   247  		p := s.Prog(ld)
   248  		p.From.Type = obj.TYPE_MEM
   249  		p.From.Reg = r0
   250  		p.To.Type = obj.TYPE_REG
   251  		p.To.Reg = out
   252  		// STDCCC or STWCCC
   253  		p1 := s.Prog(st)
   254  		p1.From.Type = obj.TYPE_REG
   255  		p1.From.Reg = r1
   256  		p1.To.Type = obj.TYPE_MEM
   257  		p1.To.Reg = r0
   258  		// BNE retry
   259  		p2 := s.Prog(ppc64.ABNE)
   260  		p2.To.Type = obj.TYPE_BRANCH
   261  		p2.To.SetTarget(p)
   262  		// ISYNC
   263  		pisync := s.Prog(ppc64.AISYNC)
   264  		pisync.To.Type = obj.TYPE_NONE
   265  
   266  	case ssa.OpPPC64LoweredAtomicLoad8,
   267  		ssa.OpPPC64LoweredAtomicLoad32,
   268  		ssa.OpPPC64LoweredAtomicLoad64,
   269  		ssa.OpPPC64LoweredAtomicLoadPtr:
   270  		// SYNC
   271  		// MOVB/MOVD/MOVW (Rarg0), Rout
   272  		// CMP Rout,Rout
   273  		// BNE 1(PC)
   274  		// ISYNC
   275  		ld := ppc64.AMOVD
   276  		cmp := ppc64.ACMP
   277  		switch v.Op {
   278  		case ssa.OpPPC64LoweredAtomicLoad8:
   279  			ld = ppc64.AMOVBZ
   280  		case ssa.OpPPC64LoweredAtomicLoad32:
   281  			ld = ppc64.AMOVWZ
   282  			cmp = ppc64.ACMPW
   283  		}
   284  		arg0 := v.Args[0].Reg()
   285  		out := v.Reg0()
   286  		// SYNC when AuxInt == 1; otherwise, load-acquire
   287  		if v.AuxInt == 1 {
   288  			psync := s.Prog(ppc64.ASYNC)
   289  			psync.To.Type = obj.TYPE_NONE
   290  		}
   291  		// Load
   292  		p := s.Prog(ld)
   293  		p.From.Type = obj.TYPE_MEM
   294  		p.From.Reg = arg0
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = out
   297  		// CMP
   298  		p1 := s.Prog(cmp)
   299  		p1.From.Type = obj.TYPE_REG
   300  		p1.From.Reg = out
   301  		p1.To.Type = obj.TYPE_REG
   302  		p1.To.Reg = out
   303  		// BNE
   304  		p2 := s.Prog(ppc64.ABNE)
   305  		p2.To.Type = obj.TYPE_BRANCH
   306  		// ISYNC
   307  		pisync := s.Prog(ppc64.AISYNC)
   308  		pisync.To.Type = obj.TYPE_NONE
   309  		p2.To.SetTarget(pisync)
   310  
   311  	case ssa.OpPPC64LoweredAtomicStore8,
   312  		ssa.OpPPC64LoweredAtomicStore32,
   313  		ssa.OpPPC64LoweredAtomicStore64:
   314  		// SYNC or LWSYNC
   315  		// MOVB/MOVW/MOVD arg1,(arg0)
   316  		st := ppc64.AMOVD
   317  		switch v.Op {
   318  		case ssa.OpPPC64LoweredAtomicStore8:
   319  			st = ppc64.AMOVB
   320  		case ssa.OpPPC64LoweredAtomicStore32:
   321  			st = ppc64.AMOVW
   322  		}
   323  		arg0 := v.Args[0].Reg()
   324  		arg1 := v.Args[1].Reg()
   325  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   326  		// SYNC
   327  		syncOp := ppc64.ASYNC
   328  		if v.AuxInt == 0 {
   329  			syncOp = ppc64.ALWSYNC
   330  		}
   331  		psync := s.Prog(syncOp)
   332  		psync.To.Type = obj.TYPE_NONE
   333  		// Store
   334  		p := s.Prog(st)
   335  		p.To.Type = obj.TYPE_MEM
   336  		p.To.Reg = arg0
   337  		p.From.Type = obj.TYPE_REG
   338  		p.From.Reg = arg1
   339  
   340  	case ssa.OpPPC64LoweredAtomicCas64,
   341  		ssa.OpPPC64LoweredAtomicCas32:
   342  		// MOVD        $0, Rout
   343  		// LWSYNC
   344  		// loop:
   345  		// LDAR        (Rarg0), MutexHint, Rtmp
   346  		// CMP         Rarg1, Rtmp
   347  		// BNE         end
   348  		// STDCCC      Rarg2, (Rarg0)
   349  		// BNE         loop
   350  		// MOVD        $1, Rout
   351  		// end:
   352  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   353  		ld := ppc64.ALDAR
   354  		st := ppc64.ASTDCCC
   355  		cmp := ppc64.ACMP
   356  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   357  			ld = ppc64.ALWAR
   358  			st = ppc64.ASTWCCC
   359  			cmp = ppc64.ACMPW
   360  		}
   361  		r0 := v.Args[0].Reg()
   362  		r1 := v.Args[1].Reg()
   363  		r2 := v.Args[2].Reg()
   364  		out := v.Reg0()
   365  		// Initialize return value to false
   366  		p := s.Prog(ppc64.AMOVD)
   367  		p.From.Type = obj.TYPE_CONST
   368  		p.From.Offset = 0
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = out
   371  		// LWSYNC - Assuming shared data not write-through-required nor
   372  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   373  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   374  		plwsync1.To.Type = obj.TYPE_NONE
   375  		// LDAR or LWAR
   376  		p0 := s.Prog(ld)
   377  		p0.From.Type = obj.TYPE_MEM
   378  		p0.From.Reg = r0
   379  		p0.To.Type = obj.TYPE_REG
   380  		p0.To.Reg = ppc64.REGTMP
   381  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   382  		// the release hint.
   383  		if v.AuxInt == 0 {
   384  			p0.AddRestSourceConst(0)
   385  		}
   386  		// CMP reg1,reg2
   387  		p1 := s.Prog(cmp)
   388  		p1.From.Type = obj.TYPE_REG
   389  		p1.From.Reg = r1
   390  		p1.To.Reg = ppc64.REGTMP
   391  		p1.To.Type = obj.TYPE_REG
   392  		// BNE done with return value = false
   393  		p2 := s.Prog(ppc64.ABNE)
   394  		p2.To.Type = obj.TYPE_BRANCH
   395  		// STDCCC or STWCCC
   396  		p3 := s.Prog(st)
   397  		p3.From.Type = obj.TYPE_REG
   398  		p3.From.Reg = r2
   399  		p3.To.Type = obj.TYPE_MEM
   400  		p3.To.Reg = r0
   401  		// BNE retry
   402  		p4 := s.Prog(ppc64.ABNE)
   403  		p4.To.Type = obj.TYPE_BRANCH
   404  		p4.To.SetTarget(p0)
   405  		// return value true
   406  		p5 := s.Prog(ppc64.AMOVD)
   407  		p5.From.Type = obj.TYPE_CONST
   408  		p5.From.Offset = 1
   409  		p5.To.Type = obj.TYPE_REG
   410  		p5.To.Reg = out
   411  		// LWSYNC - Assuming shared data not write-through-required nor
   412  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   413  		// If the operation is a CAS-Release, then synchronization is not necessary.
   414  		if v.AuxInt != 0 {
   415  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   416  			plwsync2.To.Type = obj.TYPE_NONE
   417  			p2.To.SetTarget(plwsync2)
   418  		} else {
   419  			// done (label)
   420  			p6 := s.Prog(obj.ANOP)
   421  			p2.To.SetTarget(p6)
   422  		}
   423  
   424  	case ssa.OpPPC64LoweredPubBarrier:
   425  		// LWSYNC
   426  		s.Prog(v.Op.Asm())
   427  
   428  	case ssa.OpPPC64LoweredGetClosurePtr:
   429  		// Closure pointer is R11 (already)
   430  		ssagen.CheckLoweredGetClosurePtr(v)
   431  
   432  	case ssa.OpPPC64LoweredGetCallerSP:
   433  		// caller's SP is FixedFrameSize below the address of the first arg
   434  		p := s.Prog(ppc64.AMOVD)
   435  		p.From.Type = obj.TYPE_ADDR
   436  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
   437  		p.From.Name = obj.NAME_PARAM
   438  		p.To.Type = obj.TYPE_REG
   439  		p.To.Reg = v.Reg()
   440  
   441  	case ssa.OpPPC64LoweredGetCallerPC:
   442  		p := s.Prog(obj.AGETCALLERPC)
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg()
   445  
   446  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   447  		// input is already rounded
   448  
   449  	case ssa.OpLoadReg:
   450  		loadOp := loadByType(v.Type)
   451  		p := s.Prog(loadOp)
   452  		ssagen.AddrAuto(&p.From, v.Args[0])
   453  		p.To.Type = obj.TYPE_REG
   454  		p.To.Reg = v.Reg()
   455  
   456  	case ssa.OpStoreReg:
   457  		storeOp := storeByType(v.Type)
   458  		p := s.Prog(storeOp)
   459  		p.From.Type = obj.TYPE_REG
   460  		p.From.Reg = v.Args[0].Reg()
   461  		ssagen.AddrAuto(&p.To, v)
   462  
   463  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   464  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   465  		// The loop only runs once.
   466  		for _, a := range v.Block.Func.RegArgs {
   467  			// Pass the spill/unspill information along to the assembler, offset by size of
   468  			// the saved LR slot.
   469  			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   470  			s.FuncInfo().AddSpill(
   471  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   472  		}
   473  		v.Block.Func.RegArgs = nil
   474  
   475  		ssagen.CheckArgReg(v)
   476  
   477  	case ssa.OpPPC64DIVD:
   478  		// For now,
   479  		//
   480  		// cmp arg1, -1
   481  		// be  ahead
   482  		// v = arg0 / arg1
   483  		// b over
   484  		// ahead: v = - arg0
   485  		// over: nop
   486  		r := v.Reg()
   487  		r0 := v.Args[0].Reg()
   488  		r1 := v.Args[1].Reg()
   489  
   490  		p := s.Prog(ppc64.ACMP)
   491  		p.From.Type = obj.TYPE_REG
   492  		p.From.Reg = r1
   493  		p.To.Type = obj.TYPE_CONST
   494  		p.To.Offset = -1
   495  
   496  		pbahead := s.Prog(ppc64.ABEQ)
   497  		pbahead.To.Type = obj.TYPE_BRANCH
   498  
   499  		p = s.Prog(v.Op.Asm())
   500  		p.From.Type = obj.TYPE_REG
   501  		p.From.Reg = r1
   502  		p.Reg = r0
   503  		p.To.Type = obj.TYPE_REG
   504  		p.To.Reg = r
   505  
   506  		pbover := s.Prog(obj.AJMP)
   507  		pbover.To.Type = obj.TYPE_BRANCH
   508  
   509  		p = s.Prog(ppc64.ANEG)
   510  		p.To.Type = obj.TYPE_REG
   511  		p.To.Reg = r
   512  		p.From.Type = obj.TYPE_REG
   513  		p.From.Reg = r0
   514  		pbahead.To.SetTarget(p)
   515  
   516  		p = s.Prog(obj.ANOP)
   517  		pbover.To.SetTarget(p)
   518  
   519  	case ssa.OpPPC64DIVW:
   520  		// word-width version of above
   521  		r := v.Reg()
   522  		r0 := v.Args[0].Reg()
   523  		r1 := v.Args[1].Reg()
   524  
   525  		p := s.Prog(ppc64.ACMPW)
   526  		p.From.Type = obj.TYPE_REG
   527  		p.From.Reg = r1
   528  		p.To.Type = obj.TYPE_CONST
   529  		p.To.Offset = -1
   530  
   531  		pbahead := s.Prog(ppc64.ABEQ)
   532  		pbahead.To.Type = obj.TYPE_BRANCH
   533  
   534  		p = s.Prog(v.Op.Asm())
   535  		p.From.Type = obj.TYPE_REG
   536  		p.From.Reg = r1
   537  		p.Reg = r0
   538  		p.To.Type = obj.TYPE_REG
   539  		p.To.Reg = r
   540  
   541  		pbover := s.Prog(obj.AJMP)
   542  		pbover.To.Type = obj.TYPE_BRANCH
   543  
   544  		p = s.Prog(ppc64.ANEG)
   545  		p.To.Type = obj.TYPE_REG
   546  		p.To.Reg = r
   547  		p.From.Type = obj.TYPE_REG
   548  		p.From.Reg = r0
   549  		pbahead.To.SetTarget(p)
   550  
   551  		p = s.Prog(obj.ANOP)
   552  		pbover.To.SetTarget(p)
   553  
   554  	case ssa.OpPPC64CLRLSLWI:
   555  		r := v.Reg()
   556  		r1 := v.Args[0].Reg()
   557  		shifts := v.AuxInt
   558  		p := s.Prog(v.Op.Asm())
   559  		// clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA
   560  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   561  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   562  		p.Reg = r1
   563  		p.To.Type = obj.TYPE_REG
   564  		p.To.Reg = r
   565  
   566  	case ssa.OpPPC64CLRLSLDI:
   567  		r := v.Reg()
   568  		r1 := v.Args[0].Reg()
   569  		shifts := v.AuxInt
   570  		p := s.Prog(v.Op.Asm())
   571  		// clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh
   572  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}
   573  		p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts))
   574  		p.Reg = r1
   575  		p.To.Type = obj.TYPE_REG
   576  		p.To.Reg = r
   577  
   578  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   579  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   580  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   581  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   582  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   583  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   584  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
   585  		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
   586  		r := v.Reg()
   587  		r1 := v.Args[0].Reg()
   588  		r2 := v.Args[1].Reg()
   589  		p := s.Prog(v.Op.Asm())
   590  		p.From.Type = obj.TYPE_REG
   591  		p.From.Reg = r2
   592  		p.Reg = r1
   593  		p.To.Type = obj.TYPE_REG
   594  		p.To.Reg = r
   595  
   596  	case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
   597  		ssa.OpPPC64ANDNCC:
   598  		r1 := v.Args[0].Reg()
   599  		r2 := v.Args[1].Reg()
   600  		p := s.Prog(v.Op.Asm())
   601  		p.From.Type = obj.TYPE_REG
   602  		p.From.Reg = r2
   603  		p.Reg = r1
   604  		p.To.Type = obj.TYPE_REG
   605  		p.To.Reg = v.Reg0()
   606  
   607  	case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.To.Type = obj.TYPE_REG
   610  		p.To.Reg = v.Reg0()
   611  		p.From.Type = obj.TYPE_REG
   612  		p.From.Reg = v.Args[0].Reg()
   613  
   614  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   615  		p := s.Prog(v.Op.Asm())
   616  		p.From.Type = obj.TYPE_CONST
   617  		p.From.Offset = v.AuxInt
   618  		p.Reg = v.Args[0].Reg()
   619  		p.To.Type = obj.TYPE_REG
   620  		p.To.Reg = v.Reg()
   621  
   622  		// Auxint holds encoded rotate + mask
   623  	case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
   624  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   625  		p := s.Prog(v.Op.Asm())
   626  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   627  		p.Reg = v.Args[0].Reg()
   628  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)}
   629  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   630  		// Auxint holds mask
   631  
   632  	case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR:
   633  		sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   634  		p := s.Prog(v.Op.Asm())
   635  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh}
   636  		switch v.Op {
   637  		case ssa.OpPPC64RLDICL:
   638  			p.AddRestSourceConst(mb)
   639  		case ssa.OpPPC64RLDICR:
   640  			p.AddRestSourceConst(me)
   641  		}
   642  		p.Reg = v.Args[0].Reg()
   643  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   644  
   645  	case ssa.OpPPC64RLWNM:
   646  		_, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt)
   647  		p := s.Prog(v.Op.Asm())
   648  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   649  		p.Reg = v.Args[0].Reg()
   650  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
   651  		p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}})
   652  
   653  	case ssa.OpPPC64MADDLD:
   654  		r := v.Reg()
   655  		r1 := v.Args[0].Reg()
   656  		r2 := v.Args[1].Reg()
   657  		r3 := v.Args[2].Reg()
   658  		// r = r1*r2 ± r3
   659  		p := s.Prog(v.Op.Asm())
   660  		p.From.Type = obj.TYPE_REG
   661  		p.From.Reg = r1
   662  		p.Reg = r2
   663  		p.AddRestSourceReg(r3)
   664  		p.To.Type = obj.TYPE_REG
   665  		p.To.Reg = r
   666  
   667  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   668  		r := v.Reg()
   669  		r1 := v.Args[0].Reg()
   670  		r2 := v.Args[1].Reg()
   671  		r3 := v.Args[2].Reg()
   672  		// r = r1*r2 ± r3
   673  		p := s.Prog(v.Op.Asm())
   674  		p.From.Type = obj.TYPE_REG
   675  		p.From.Reg = r1
   676  		p.Reg = r3
   677  		p.AddRestSourceReg(r2)
   678  		p.To.Type = obj.TYPE_REG
   679  		p.To.Reg = r
   680  
   681  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   682  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   683  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   684  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD:
   685  		r := v.Reg()
   686  		p := s.Prog(v.Op.Asm())
   687  		p.To.Type = obj.TYPE_REG
   688  		p.To.Reg = r
   689  		p.From.Type = obj.TYPE_REG
   690  		p.From.Reg = v.Args[0].Reg()
   691  
   692  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   693  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst,
   694  		ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst:
   695  		p := s.Prog(v.Op.Asm())
   696  		p.Reg = v.Args[0].Reg()
   697  		p.From.Type = obj.TYPE_CONST
   698  		p.From.Offset = v.AuxInt
   699  		p.To.Type = obj.TYPE_REG
   700  		p.To.Reg = v.Reg()
   701  
   702  	case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE:
   703  		r := v.Reg0()	// CA is the first, implied argument.
   704  		r1 := v.Args[0].Reg()
   705  		r2 := v.Args[1].Reg()
   706  		p := s.Prog(v.Op.Asm())
   707  		p.From.Type = obj.TYPE_REG
   708  		p.From.Reg = r2
   709  		p.Reg = r1
   710  		p.To.Type = obj.TYPE_REG
   711  		p.To.Reg = r
   712  
   713  	case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero:
   714  		p := s.Prog(v.Op.Asm())
   715  		p.From.Type = obj.TYPE_REG
   716  		p.From.Reg = ppc64.REG_R0
   717  		p.To.Type = obj.TYPE_REG
   718  		p.To.Reg = v.Reg()
   719  
   720  	case ssa.OpPPC64ADDCconst:
   721  		p := s.Prog(v.Op.Asm())
   722  		p.Reg = v.Args[0].Reg()
   723  		p.From.Type = obj.TYPE_CONST
   724  		p.From.Offset = v.AuxInt
   725  		p.To.Type = obj.TYPE_REG
   726  		// Output is a pair, the second is the CA, which is implied.
   727  		p.To.Reg = v.Reg0()
   728  
   729  	case ssa.OpPPC64SUBCconst:
   730  		p := s.Prog(v.Op.Asm())
   731  		p.AddRestSourceConst(v.AuxInt)
   732  		p.From.Type = obj.TYPE_REG
   733  		p.From.Reg = v.Args[0].Reg()
   734  		p.To.Type = obj.TYPE_REG
   735  		p.To.Reg = v.Reg0()
   736  
   737  	case ssa.OpPPC64SUBFCconst:
   738  		p := s.Prog(v.Op.Asm())
   739  		p.AddRestSourceConst(v.AuxInt)
   740  		p.From.Type = obj.TYPE_REG
   741  		p.From.Reg = v.Args[0].Reg()
   742  		p.To.Type = obj.TYPE_REG
   743  		p.To.Reg = v.Reg()
   744  
   745  	case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
   746  		p := s.Prog(v.Op.Asm())
   747  		p.Reg = v.Args[0].Reg()
   748  		p.From.Type = obj.TYPE_CONST
   749  		p.From.Offset = v.AuxInt
   750  		p.To.Type = obj.TYPE_REG
   751  		p.To.Reg = v.Reg0()
   752  
   753  	case ssa.OpPPC64MOVDaddr:
   754  		switch v.Aux.(type) {
   755  		default:
   756  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   757  		case nil:
   758  			// If aux offset and aux int are both 0, and the same
   759  			// input and output regs are used, no instruction
   760  			// needs to be generated, since it would just be
   761  			// addi rx, rx, 0.
   762  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   763  				p := s.Prog(ppc64.AMOVD)
   764  				p.From.Type = obj.TYPE_ADDR
   765  				p.From.Reg = v.Args[0].Reg()
   766  				p.From.Offset = v.AuxInt
   767  				p.To.Type = obj.TYPE_REG
   768  				p.To.Reg = v.Reg()
   769  			}
   770  
   771  		case *obj.LSym, ir.Node:
   772  			p := s.Prog(ppc64.AMOVD)
   773  			p.From.Type = obj.TYPE_ADDR
   774  			p.From.Reg = v.Args[0].Reg()
   775  			p.To.Type = obj.TYPE_REG
   776  			p.To.Reg = v.Reg()
   777  			ssagen.AddAux(&p.From, v)
   778  
   779  		}
   780  
   781  	case ssa.OpPPC64MOVDconst:
   782  		p := s.Prog(v.Op.Asm())
   783  		p.From.Type = obj.TYPE_CONST
   784  		p.From.Offset = v.AuxInt
   785  		p.To.Type = obj.TYPE_REG
   786  		p.To.Reg = v.Reg()
   787  
   788  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   789  		p := s.Prog(v.Op.Asm())
   790  		p.From.Type = obj.TYPE_FCONST
   791  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   792  		p.To.Type = obj.TYPE_REG
   793  		p.To.Reg = v.Reg()
   794  
   795  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   796  		p := s.Prog(v.Op.Asm())
   797  		p.From.Type = obj.TYPE_REG
   798  		p.From.Reg = v.Args[0].Reg()
   799  		p.To.Type = obj.TYPE_REG
   800  		p.To.Reg = v.Args[1].Reg()
   801  
   802  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   803  		p := s.Prog(v.Op.Asm())
   804  		p.From.Type = obj.TYPE_REG
   805  		p.From.Reg = v.Args[0].Reg()
   806  		p.To.Type = obj.TYPE_CONST
   807  		p.To.Offset = v.AuxInt
   808  
   809  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   810  		// Shift in register to required size
   811  		p := s.Prog(v.Op.Asm())
   812  		p.From.Type = obj.TYPE_REG
   813  		p.From.Reg = v.Args[0].Reg()
   814  		p.To.Reg = v.Reg()
   815  		p.To.Type = obj.TYPE_REG
   816  
   817  	case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
   818  
   819  		// MOVDload and MOVWload are DS form instructions that are restricted to
   820  		// offsets that are a multiple of 4. If the offset is not a multiple of 4,
   821  		// then the address of the symbol to be loaded is computed (base + offset)
   822  		// and used as the new base register and the offset field in the instruction
   823  		// can be set to zero.
   824  
   825  		// This same problem can happen with gostrings since the final offset is not
   826  		// known yet, but could be unaligned after the relocation is resolved.
   827  		// So gostrings are handled the same way.
   828  
   829  		// This allows the MOVDload and MOVWload to be generated in more cases and
   830  		// eliminates some offset and alignment checking in the rules file.
   831  
   832  		fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   833  		ssagen.AddAux(&fromAddr, v)
   834  
   835  		genAddr := false
   836  
   837  		switch fromAddr.Name {
   838  		case obj.NAME_EXTERN, obj.NAME_STATIC:
   839  			// Special case for a rule combines the bytes of gostring.
   840  			// The v alignment might seem OK, but we don't want to load it
   841  			// using an offset because relocation comes later.
   842  			genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
   843  		default:
   844  			genAddr = fromAddr.Offset%4 != 0
   845  		}
   846  		if genAddr {
   847  			// Load full address into the temp register.
   848  			p := s.Prog(ppc64.AMOVD)
   849  			p.From.Type = obj.TYPE_ADDR
   850  			p.From.Reg = v.Args[0].Reg()
   851  			ssagen.AddAux(&p.From, v)
   852  			// Load target using temp as base register
   853  			// and offset zero. Setting NAME_NONE
   854  			// prevents any extra offsets from being
   855  			// added.
   856  			p.To.Type = obj.TYPE_REG
   857  			p.To.Reg = ppc64.REGTMP
   858  			fromAddr.Reg = ppc64.REGTMP
   859  			// Clear the offset field and other
   860  			// information that might be used
   861  			// by the assembler to add to the
   862  			// final offset value.
   863  			fromAddr.Offset = 0
   864  			fromAddr.Name = obj.NAME_NONE
   865  			fromAddr.Sym = nil
   866  		}
   867  		p := s.Prog(v.Op.Asm())
   868  		p.From = fromAddr
   869  		p.To.Type = obj.TYPE_REG
   870  		p.To.Reg = v.Reg()
   871  
   872  	case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   873  		p := s.Prog(v.Op.Asm())
   874  		p.From.Type = obj.TYPE_MEM
   875  		p.From.Reg = v.Args[0].Reg()
   876  		ssagen.AddAux(&p.From, v)
   877  		p.To.Type = obj.TYPE_REG
   878  		p.To.Reg = v.Reg()
   879  
   880  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   881  		p := s.Prog(v.Op.Asm())
   882  		p.From.Type = obj.TYPE_MEM
   883  		p.From.Reg = v.Args[0].Reg()
   884  		p.To.Type = obj.TYPE_REG
   885  		p.To.Reg = v.Reg()
   886  
   887  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   888  		p := s.Prog(v.Op.Asm())
   889  		p.To.Type = obj.TYPE_MEM
   890  		p.To.Reg = v.Args[0].Reg()
   891  		p.From.Type = obj.TYPE_REG
   892  		p.From.Reg = v.Args[1].Reg()
   893  
   894  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   895  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   896  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   897  		p := s.Prog(v.Op.Asm())
   898  		p.From.Type = obj.TYPE_MEM
   899  		p.From.Reg = v.Args[0].Reg()
   900  		p.From.Index = v.Args[1].Reg()
   901  		p.To.Type = obj.TYPE_REG
   902  		p.To.Reg = v.Reg()
   903  
   904  	case ssa.OpPPC64DCBT:
   905  		p := s.Prog(v.Op.Asm())
   906  		p.From.Type = obj.TYPE_MEM
   907  		p.From.Reg = v.Args[0].Reg()
   908  		p.To.Type = obj.TYPE_CONST
   909  		p.To.Offset = v.AuxInt
   910  
   911  	case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   912  		p := s.Prog(v.Op.Asm())
   913  		p.From.Type = obj.TYPE_REG
   914  		p.From.Reg = ppc64.REGZERO
   915  		p.To.Type = obj.TYPE_MEM
   916  		p.To.Reg = v.Args[0].Reg()
   917  		ssagen.AddAux(&p.To, v)
   918  
   919  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
   920  
   921  		// MOVDstore and MOVDstorezero become DS form instructions that are restricted
   922  		// to offset values that are a multiple of 4. If the offset field is not a
   923  		// multiple of 4, then the full address of the store target is computed (base +
   924  		// offset) and used as the new base register and the offset in the instruction
   925  		// is set to 0.
   926  
   927  		// This allows the MOVDstore and MOVDstorezero to be generated in more cases,
   928  		// and prevents checking of the offset value and alignment in the rules.
   929  
   930  		toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   931  		ssagen.AddAux(&toAddr, v)
   932  
   933  		if toAddr.Offset%4 != 0 {
   934  			p := s.Prog(ppc64.AMOVD)
   935  			p.From.Type = obj.TYPE_ADDR
   936  			p.From.Reg = v.Args[0].Reg()
   937  			ssagen.AddAux(&p.From, v)
   938  			p.To.Type = obj.TYPE_REG
   939  			p.To.Reg = ppc64.REGTMP
   940  			toAddr.Reg = ppc64.REGTMP
   941  			// Clear the offset field and other
   942  			// information that might be used
   943  			// by the assembler to add to the
   944  			// final offset value.
   945  			toAddr.Offset = 0
   946  			toAddr.Name = obj.NAME_NONE
   947  			toAddr.Sym = nil
   948  		}
   949  		p := s.Prog(v.Op.Asm())
   950  		p.To = toAddr
   951  		p.From.Type = obj.TYPE_REG
   952  		if v.Op == ssa.OpPPC64MOVDstorezero {
   953  			p.From.Reg = ppc64.REGZERO
   954  		} else {
   955  			p.From.Reg = v.Args[1].Reg()
   956  		}
   957  
   958  	case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   959  		p := s.Prog(v.Op.Asm())
   960  		p.From.Type = obj.TYPE_REG
   961  		p.From.Reg = v.Args[1].Reg()
   962  		p.To.Type = obj.TYPE_MEM
   963  		p.To.Reg = v.Args[0].Reg()
   964  		ssagen.AddAux(&p.To, v)
   965  
   966  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   967  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   968  		ssa.OpPPC64MOVHBRstoreidx:
   969  		p := s.Prog(v.Op.Asm())
   970  		p.From.Type = obj.TYPE_REG
   971  		p.From.Reg = v.Args[2].Reg()
   972  		p.To.Index = v.Args[1].Reg()
   973  		p.To.Type = obj.TYPE_MEM
   974  		p.To.Reg = v.Args[0].Reg()
   975  
   976  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ:
   977  		// ISEL  AuxInt ? arg0 : arg1
   978  		// ISELZ is a special case of ISEL where arg1 is implicitly $0.
   979  		//
   980  		// AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO.
   981  		// ISEL accepts a CR bit argument, not a condition as expressed by AuxInt.
   982  		// Convert the condition to a CR bit argument by the following conversion:
   983  		//
   984  		// AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO
   985  		// AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO
   986  		p := s.Prog(v.Op.Asm())
   987  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   988  		p.Reg = v.Args[0].Reg()
   989  		if v.Op == ssa.OpPPC64ISEL {
   990  			p.AddRestSourceReg(v.Args[1].Reg())
   991  		} else {
   992  			p.AddRestSourceReg(ppc64.REG_R0)
   993  		}
   994  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
   995  		if v.AuxInt > 3 {
   996  			p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg
   997  		}
   998  		p.From.SetConst(v.AuxInt & 3)
   999  
  1000  	case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR:
  1001  		p := s.Prog(v.Op.Asm())
  1002  		p.To.Type = obj.TYPE_REG
  1003  		p.To.Reg = v.Reg()
  1004  		p.From.Type = obj.TYPE_REG
  1005  		p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt)
  1006  
  1007  	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
  1008  		// The LoweredQuad code generation
  1009  		// generates STXV instructions on
  1010  		// power9. The Short variation is used
  1011  		// if no loop is generated.
  1012  
  1013  		// sizes >= 64 generate a loop as follows:
  1014  
  1015  		// Set up loop counter in CTR, used by BC
  1016  		// XXLXOR clears VS32
  1017  		//       XXLXOR VS32,VS32,VS32
  1018  		//       MOVD len/64,REG_TMP
  1019  		//       MOVD REG_TMP,CTR
  1020  		//       loop:
  1021  		//       STXV VS32,0(R20)
  1022  		//       STXV VS32,16(R20)
  1023  		//       STXV VS32,32(R20)
  1024  		//       STXV VS32,48(R20)
  1025  		//       ADD  $64,R20
  1026  		//       BC   16, 0, loop
  1027  
  1028  		// Bytes per iteration
  1029  		ctr := v.AuxInt / 64
  1030  
  1031  		// Remainder bytes
  1032  		rem := v.AuxInt % 64
  1033  
  1034  		// Only generate a loop if there is more
  1035  		// than 1 iteration.
  1036  		if ctr > 1 {
  1037  			// Set up VS32 (V0) to hold 0s
  1038  			p := s.Prog(ppc64.AXXLXOR)
  1039  			p.From.Type = obj.TYPE_REG
  1040  			p.From.Reg = ppc64.REG_VS32
  1041  			p.To.Type = obj.TYPE_REG
  1042  			p.To.Reg = ppc64.REG_VS32
  1043  			p.Reg = ppc64.REG_VS32
  1044  
  1045  			// Set up CTR loop counter
  1046  			p = s.Prog(ppc64.AMOVD)
  1047  			p.From.Type = obj.TYPE_CONST
  1048  			p.From.Offset = ctr
  1049  			p.To.Type = obj.TYPE_REG
  1050  			p.To.Reg = ppc64.REGTMP
  1051  
  1052  			p = s.Prog(ppc64.AMOVD)
  1053  			p.From.Type = obj.TYPE_REG
  1054  			p.From.Reg = ppc64.REGTMP
  1055  			p.To.Type = obj.TYPE_REG
  1056  			p.To.Reg = ppc64.REG_CTR
  1057  
  1058  			// Don't generate padding for
  1059  			// loops with few iterations.
  1060  			if ctr > 3 {
  1061  				p = s.Prog(obj.APCALIGN)
  1062  				p.From.Type = obj.TYPE_CONST
  1063  				p.From.Offset = 16
  1064  			}
  1065  
  1066  			// generate 4 STXVs to zero 64 bytes
  1067  			var top *obj.Prog
  1068  
  1069  			p = s.Prog(ppc64.ASTXV)
  1070  			p.From.Type = obj.TYPE_REG
  1071  			p.From.Reg = ppc64.REG_VS32
  1072  			p.To.Type = obj.TYPE_MEM
  1073  			p.To.Reg = v.Args[0].Reg()
  1074  
  1075  			//  Save the top of loop
  1076  			if top == nil {
  1077  				top = p
  1078  			}
  1079  			p = s.Prog(ppc64.ASTXV)
  1080  			p.From.Type = obj.TYPE_REG
  1081  			p.From.Reg = ppc64.REG_VS32
  1082  			p.To.Type = obj.TYPE_MEM
  1083  			p.To.Reg = v.Args[0].Reg()
  1084  			p.To.Offset = 16
  1085  
  1086  			p = s.Prog(ppc64.ASTXV)
  1087  			p.From.Type = obj.TYPE_REG
  1088  			p.From.Reg = ppc64.REG_VS32
  1089  			p.To.Type = obj.TYPE_MEM
  1090  			p.To.Reg = v.Args[0].Reg()
  1091  			p.To.Offset = 32
  1092  
  1093  			p = s.Prog(ppc64.ASTXV)
  1094  			p.From.Type = obj.TYPE_REG
  1095  			p.From.Reg = ppc64.REG_VS32
  1096  			p.To.Type = obj.TYPE_MEM
  1097  			p.To.Reg = v.Args[0].Reg()
  1098  			p.To.Offset = 48
  1099  
  1100  			// Increment address for the
  1101  			// 64 bytes just zeroed.
  1102  			p = s.Prog(ppc64.AADD)
  1103  			p.Reg = v.Args[0].Reg()
  1104  			p.From.Type = obj.TYPE_CONST
  1105  			p.From.Offset = 64
  1106  			p.To.Type = obj.TYPE_REG
  1107  			p.To.Reg = v.Args[0].Reg()
  1108  
  1109  			// Branch back to top of loop
  1110  			// based on CTR
  1111  			// BC with BO_BCTR generates bdnz
  1112  			p = s.Prog(ppc64.ABC)
  1113  			p.From.Type = obj.TYPE_CONST
  1114  			p.From.Offset = ppc64.BO_BCTR
  1115  			p.Reg = ppc64.REG_CR0LT
  1116  			p.To.Type = obj.TYPE_BRANCH
  1117  			p.To.SetTarget(top)
  1118  		}
  1119  		// When ctr == 1 the loop was not generated but
  1120  		// there are at least 64 bytes to clear, so add
  1121  		// that to the remainder to generate the code
  1122  		// to clear those doublewords
  1123  		if ctr == 1 {
  1124  			rem += 64
  1125  		}
  1126  
  1127  		// Clear the remainder starting at offset zero
  1128  		offset := int64(0)
  1129  
  1130  		if rem >= 16 && ctr <= 1 {
  1131  			// If the XXLXOR hasn't already been
  1132  			// generated, do it here to initialize
  1133  			// VS32 (V0) to 0.
  1134  			p := s.Prog(ppc64.AXXLXOR)
  1135  			p.From.Type = obj.TYPE_REG
  1136  			p.From.Reg = ppc64.REG_VS32
  1137  			p.To.Type = obj.TYPE_REG
  1138  			p.To.Reg = ppc64.REG_VS32
  1139  			p.Reg = ppc64.REG_VS32
  1140  		}
  1141  		// Generate STXV for 32 or 64
  1142  		// bytes.
  1143  		for rem >= 32 {
  1144  			p := s.Prog(ppc64.ASTXV)
  1145  			p.From.Type = obj.TYPE_REG
  1146  			p.From.Reg = ppc64.REG_VS32
  1147  			p.To.Type = obj.TYPE_MEM
  1148  			p.To.Reg = v.Args[0].Reg()
  1149  			p.To.Offset = offset
  1150  
  1151  			p = s.Prog(ppc64.ASTXV)
  1152  			p.From.Type = obj.TYPE_REG
  1153  			p.From.Reg = ppc64.REG_VS32
  1154  			p.To.Type = obj.TYPE_MEM
  1155  			p.To.Reg = v.Args[0].Reg()
  1156  			p.To.Offset = offset + 16
  1157  			offset += 32
  1158  			rem -= 32
  1159  		}
  1160  		// Generate 16 bytes
  1161  		if rem >= 16 {
  1162  			p := s.Prog(ppc64.ASTXV)
  1163  			p.From.Type = obj.TYPE_REG
  1164  			p.From.Reg = ppc64.REG_VS32
  1165  			p.To.Type = obj.TYPE_MEM
  1166  			p.To.Reg = v.Args[0].Reg()
  1167  			p.To.Offset = offset
  1168  			offset += 16
  1169  			rem -= 16
  1170  		}
  1171  
  1172  		// first clear as many doublewords as possible
  1173  		// then clear remaining sizes as available
  1174  		for rem > 0 {
  1175  			op, size := ppc64.AMOVB, int64(1)
  1176  			switch {
  1177  			case rem >= 8:
  1178  				op, size = ppc64.AMOVD, 8
  1179  			case rem >= 4:
  1180  				op, size = ppc64.AMOVW, 4
  1181  			case rem >= 2:
  1182  				op, size = ppc64.AMOVH, 2
  1183  			}
  1184  			p := s.Prog(op)
  1185  			p.From.Type = obj.TYPE_REG
  1186  			p.From.Reg = ppc64.REG_R0
  1187  			p.To.Type = obj.TYPE_MEM
  1188  			p.To.Reg = v.Args[0].Reg()
  1189  			p.To.Offset = offset
  1190  			rem -= size
  1191  			offset += size
  1192  		}
  1193  
  1194  	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
  1195  
  1196  		// Unaligned data doesn't hurt performance
  1197  		// for these instructions on power8.
  1198  
  1199  		// For sizes >= 64 generate a loop as follows:
  1200  
  1201  		// Set up loop counter in CTR, used by BC
  1202  		//       XXLXOR VS32,VS32,VS32
  1203  		//	 MOVD len/32,REG_TMP
  1204  		//	 MOVD REG_TMP,CTR
  1205  		//       MOVD $16,REG_TMP
  1206  		//	 loop:
  1207  		//	 STXVD2X VS32,(R0)(R20)
  1208  		//	 STXVD2X VS32,(R31)(R20)
  1209  		//	 ADD  $32,R20
  1210  		//	 BC   16, 0, loop
  1211  		//
  1212  		// any remainder is done as described below
  1213  
  1214  		// for sizes < 64 bytes, first clear as many doublewords as possible,
  1215  		// then handle the remainder
  1216  		//	MOVD R0,(R20)
  1217  		//	MOVD R0,8(R20)
  1218  		// .... etc.
  1219  		//
  1220  		// the remainder bytes are cleared using one or more
  1221  		// of the following instructions with the appropriate
  1222  		// offsets depending which instructions are needed
  1223  		//
  1224  		//	MOVW R0,n1(R20)	4 bytes
  1225  		//	MOVH R0,n2(R20)	2 bytes
  1226  		//	MOVB R0,n3(R20)	1 byte
  1227  		//
  1228  		// 7 bytes: MOVW, MOVH, MOVB
  1229  		// 6 bytes: MOVW, MOVH
  1230  		// 5 bytes: MOVW, MOVB
  1231  		// 3 bytes: MOVH, MOVB
  1232  
  1233  		// each loop iteration does 32 bytes
  1234  		ctr := v.AuxInt / 32
  1235  
  1236  		// remainder bytes
  1237  		rem := v.AuxInt % 32
  1238  
  1239  		// only generate a loop if there is more
  1240  		// than 1 iteration.
  1241  		if ctr > 1 {
  1242  			// Set up VS32 (V0) to hold 0s
  1243  			p := s.Prog(ppc64.AXXLXOR)
  1244  			p.From.Type = obj.TYPE_REG
  1245  			p.From.Reg = ppc64.REG_VS32
  1246  			p.To.Type = obj.TYPE_REG
  1247  			p.To.Reg = ppc64.REG_VS32
  1248  			p.Reg = ppc64.REG_VS32
  1249  
  1250  			// Set up CTR loop counter
  1251  			p = s.Prog(ppc64.AMOVD)
  1252  			p.From.Type = obj.TYPE_CONST
  1253  			p.From.Offset = ctr
  1254  			p.To.Type = obj.TYPE_REG
  1255  			p.To.Reg = ppc64.REGTMP
  1256  
  1257  			p = s.Prog(ppc64.AMOVD)
  1258  			p.From.Type = obj.TYPE_REG
  1259  			p.From.Reg = ppc64.REGTMP
  1260  			p.To.Type = obj.TYPE_REG
  1261  			p.To.Reg = ppc64.REG_CTR
  1262  
  1263  			// Set up R31 to hold index value 16
  1264  			p = s.Prog(ppc64.AMOVD)
  1265  			p.From.Type = obj.TYPE_CONST
  1266  			p.From.Offset = 16
  1267  			p.To.Type = obj.TYPE_REG
  1268  			p.To.Reg = ppc64.REGTMP
  1269  
  1270  			// Don't add padding for alignment
  1271  			// with few loop iterations.
  1272  			if ctr > 3 {
  1273  				p = s.Prog(obj.APCALIGN)
  1274  				p.From.Type = obj.TYPE_CONST
  1275  				p.From.Offset = 16
  1276  			}
  1277  
  1278  			// generate 2 STXVD2Xs to store 16 bytes
  1279  			// when this is a loop then the top must be saved
  1280  			var top *obj.Prog
  1281  			// This is the top of loop
  1282  
  1283  			p = s.Prog(ppc64.ASTXVD2X)
  1284  			p.From.Type = obj.TYPE_REG
  1285  			p.From.Reg = ppc64.REG_VS32
  1286  			p.To.Type = obj.TYPE_MEM
  1287  			p.To.Reg = v.Args[0].Reg()
  1288  			p.To.Index = ppc64.REGZERO
  1289  			// Save the top of loop
  1290  			if top == nil {
  1291  				top = p
  1292  			}
  1293  			p = s.Prog(ppc64.ASTXVD2X)
  1294  			p.From.Type = obj.TYPE_REG
  1295  			p.From.Reg = ppc64.REG_VS32
  1296  			p.To.Type = obj.TYPE_MEM
  1297  			p.To.Reg = v.Args[0].Reg()
  1298  			p.To.Index = ppc64.REGTMP
  1299  
  1300  			// Increment address for the
  1301  			// 4 doublewords just zeroed.
  1302  			p = s.Prog(ppc64.AADD)
  1303  			p.Reg = v.Args[0].Reg()
  1304  			p.From.Type = obj.TYPE_CONST
  1305  			p.From.Offset = 32
  1306  			p.To.Type = obj.TYPE_REG
  1307  			p.To.Reg = v.Args[0].Reg()
  1308  
  1309  			// Branch back to top of loop
  1310  			// based on CTR
  1311  			// BC with BO_BCTR generates bdnz
  1312  			p = s.Prog(ppc64.ABC)
  1313  			p.From.Type = obj.TYPE_CONST
  1314  			p.From.Offset = ppc64.BO_BCTR
  1315  			p.Reg = ppc64.REG_CR0LT
  1316  			p.To.Type = obj.TYPE_BRANCH
  1317  			p.To.SetTarget(top)
  1318  		}
  1319  
  1320  		// when ctr == 1 the loop was not generated but
  1321  		// there are at least 32 bytes to clear, so add
  1322  		// that to the remainder to generate the code
  1323  		// to clear those doublewords
  1324  		if ctr == 1 {
  1325  			rem += 32
  1326  		}
  1327  
  1328  		// clear the remainder starting at offset zero
  1329  		offset := int64(0)
  1330  
  1331  		// first clear as many doublewords as possible
  1332  		// then clear remaining sizes as available
  1333  		for rem > 0 {
  1334  			op, size := ppc64.AMOVB, int64(1)
  1335  			switch {
  1336  			case rem >= 8:
  1337  				op, size = ppc64.AMOVD, 8
  1338  			case rem >= 4:
  1339  				op, size = ppc64.AMOVW, 4
  1340  			case rem >= 2:
  1341  				op, size = ppc64.AMOVH, 2
  1342  			}
  1343  			p := s.Prog(op)
  1344  			p.From.Type = obj.TYPE_REG
  1345  			p.From.Reg = ppc64.REG_R0
  1346  			p.To.Type = obj.TYPE_MEM
  1347  			p.To.Reg = v.Args[0].Reg()
  1348  			p.To.Offset = offset
  1349  			rem -= size
  1350  			offset += size
  1351  		}
  1352  
  1353  	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
  1354  
  1355  		bytesPerLoop := int64(32)
  1356  		// This will be used when moving more
  1357  		// than 8 bytes.  Moves start with
  1358  		// as many 8 byte moves as possible, then
  1359  		// 4, 2, or 1 byte(s) as remaining.  This will
  1360  		// work and be efficient for power8 or later.
  1361  		// If there are 64 or more bytes, then a
  1362  		// loop is generated to move 32 bytes and
  1363  		// update the src and dst addresses on each
  1364  		// iteration. When < 64 bytes, the appropriate
  1365  		// number of moves are generated based on the
  1366  		// size.
  1367  		// When moving >= 64 bytes a loop is used
  1368  		//	MOVD len/32,REG_TMP
  1369  		//	MOVD REG_TMP,CTR
  1370  		//	MOVD $16,REG_TMP
  1371  		// top:
  1372  		//	LXVD2X (R0)(R21),VS32
  1373  		//	LXVD2X (R31)(R21),VS33
  1374  		//	ADD $32,R21
  1375  		//	STXVD2X VS32,(R0)(R20)
  1376  		//	STXVD2X VS33,(R31)(R20)
  1377  		//	ADD $32,R20
  1378  		//	BC 16,0,top
  1379  		// Bytes not moved by this loop are moved
  1380  		// with a combination of the following instructions,
  1381  		// starting with the largest sizes and generating as
  1382  		// many as needed, using the appropriate offset value.
  1383  		//	MOVD  n(R21),R31
  1384  		//	MOVD  R31,n(R20)
  1385  		//	MOVW  n1(R21),R31
  1386  		//	MOVW  R31,n1(R20)
  1387  		//	MOVH  n2(R21),R31
  1388  		//	MOVH  R31,n2(R20)
  1389  		//	MOVB  n3(R21),R31
  1390  		//	MOVB  R31,n3(R20)
  1391  
  1392  		// Each loop iteration moves 32 bytes
  1393  		ctr := v.AuxInt / bytesPerLoop
  1394  
  1395  		// Remainder after the loop
  1396  		rem := v.AuxInt % bytesPerLoop
  1397  
  1398  		dstReg := v.Args[0].Reg()
  1399  		srcReg := v.Args[1].Reg()
  1400  
  1401  		// The set of registers used here, must match the clobbered reg list
  1402  		// in PPC64Ops.go.
  1403  		offset := int64(0)
  1404  
  1405  		// top of the loop
  1406  		var top *obj.Prog
  1407  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1408  		if ctr > 1 {
  1409  			// Set up the CTR
  1410  			p := s.Prog(ppc64.AMOVD)
  1411  			p.From.Type = obj.TYPE_CONST
  1412  			p.From.Offset = ctr
  1413  			p.To.Type = obj.TYPE_REG
  1414  			p.To.Reg = ppc64.REGTMP
  1415  
  1416  			p = s.Prog(ppc64.AMOVD)
  1417  			p.From.Type = obj.TYPE_REG
  1418  			p.From.Reg = ppc64.REGTMP
  1419  			p.To.Type = obj.TYPE_REG
  1420  			p.To.Reg = ppc64.REG_CTR
  1421  
  1422  			// Use REGTMP as index reg
  1423  			p = s.Prog(ppc64.AMOVD)
  1424  			p.From.Type = obj.TYPE_CONST
  1425  			p.From.Offset = 16
  1426  			p.To.Type = obj.TYPE_REG
  1427  			p.To.Reg = ppc64.REGTMP
  1428  
  1429  			// Don't adding padding for
  1430  			// alignment with small iteration
  1431  			// counts.
  1432  			if ctr > 3 {
  1433  				p = s.Prog(obj.APCALIGN)
  1434  				p.From.Type = obj.TYPE_CONST
  1435  				p.From.Offset = 16
  1436  			}
  1437  
  1438  			// Generate 16 byte loads and stores.
  1439  			// Use temp register for index (16)
  1440  			// on the second one.
  1441  
  1442  			p = s.Prog(ppc64.ALXVD2X)
  1443  			p.From.Type = obj.TYPE_MEM
  1444  			p.From.Reg = srcReg
  1445  			p.From.Index = ppc64.REGZERO
  1446  			p.To.Type = obj.TYPE_REG
  1447  			p.To.Reg = ppc64.REG_VS32
  1448  			if top == nil {
  1449  				top = p
  1450  			}
  1451  			p = s.Prog(ppc64.ALXVD2X)
  1452  			p.From.Type = obj.TYPE_MEM
  1453  			p.From.Reg = srcReg
  1454  			p.From.Index = ppc64.REGTMP
  1455  			p.To.Type = obj.TYPE_REG
  1456  			p.To.Reg = ppc64.REG_VS33
  1457  
  1458  			// increment the src reg for next iteration
  1459  			p = s.Prog(ppc64.AADD)
  1460  			p.Reg = srcReg
  1461  			p.From.Type = obj.TYPE_CONST
  1462  			p.From.Offset = bytesPerLoop
  1463  			p.To.Type = obj.TYPE_REG
  1464  			p.To.Reg = srcReg
  1465  
  1466  			// generate 16 byte stores
  1467  			p = s.Prog(ppc64.ASTXVD2X)
  1468  			p.From.Type = obj.TYPE_REG
  1469  			p.From.Reg = ppc64.REG_VS32
  1470  			p.To.Type = obj.TYPE_MEM
  1471  			p.To.Reg = dstReg
  1472  			p.To.Index = ppc64.REGZERO
  1473  
  1474  			p = s.Prog(ppc64.ASTXVD2X)
  1475  			p.From.Type = obj.TYPE_REG
  1476  			p.From.Reg = ppc64.REG_VS33
  1477  			p.To.Type = obj.TYPE_MEM
  1478  			p.To.Reg = dstReg
  1479  			p.To.Index = ppc64.REGTMP
  1480  
  1481  			// increment the dst reg for next iteration
  1482  			p = s.Prog(ppc64.AADD)
  1483  			p.Reg = dstReg
  1484  			p.From.Type = obj.TYPE_CONST
  1485  			p.From.Offset = bytesPerLoop
  1486  			p.To.Type = obj.TYPE_REG
  1487  			p.To.Reg = dstReg
  1488  
  1489  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1490  			// to loop top.
  1491  			p = s.Prog(ppc64.ABC)
  1492  			p.From.Type = obj.TYPE_CONST
  1493  			p.From.Offset = ppc64.BO_BCTR
  1494  			p.Reg = ppc64.REG_CR0LT
  1495  			p.To.Type = obj.TYPE_BRANCH
  1496  			p.To.SetTarget(top)
  1497  
  1498  			// srcReg and dstReg were incremented in the loop, so
  1499  			// later instructions start with offset 0.
  1500  			offset = int64(0)
  1501  		}
  1502  
  1503  		// No loop was generated for one iteration, so
  1504  		// add 32 bytes to the remainder to move those bytes.
  1505  		if ctr == 1 {
  1506  			rem += bytesPerLoop
  1507  		}
  1508  
  1509  		if rem >= 16 {
  1510  			// Generate 16 byte loads and stores.
  1511  			// Use temp register for index (value 16)
  1512  			// on the second one.
  1513  			p := s.Prog(ppc64.ALXVD2X)
  1514  			p.From.Type = obj.TYPE_MEM
  1515  			p.From.Reg = srcReg
  1516  			p.From.Index = ppc64.REGZERO
  1517  			p.To.Type = obj.TYPE_REG
  1518  			p.To.Reg = ppc64.REG_VS32
  1519  
  1520  			p = s.Prog(ppc64.ASTXVD2X)
  1521  			p.From.Type = obj.TYPE_REG
  1522  			p.From.Reg = ppc64.REG_VS32
  1523  			p.To.Type = obj.TYPE_MEM
  1524  			p.To.Reg = dstReg
  1525  			p.To.Index = ppc64.REGZERO
  1526  
  1527  			offset = 16
  1528  			rem -= 16
  1529  
  1530  			if rem >= 16 {
  1531  				// Use REGTMP as index reg
  1532  				p := s.Prog(ppc64.AMOVD)
  1533  				p.From.Type = obj.TYPE_CONST
  1534  				p.From.Offset = 16
  1535  				p.To.Type = obj.TYPE_REG
  1536  				p.To.Reg = ppc64.REGTMP
  1537  
  1538  				p = s.Prog(ppc64.ALXVD2X)
  1539  				p.From.Type = obj.TYPE_MEM
  1540  				p.From.Reg = srcReg
  1541  				p.From.Index = ppc64.REGTMP
  1542  				p.To.Type = obj.TYPE_REG
  1543  				p.To.Reg = ppc64.REG_VS32
  1544  
  1545  				p = s.Prog(ppc64.ASTXVD2X)
  1546  				p.From.Type = obj.TYPE_REG
  1547  				p.From.Reg = ppc64.REG_VS32
  1548  				p.To.Type = obj.TYPE_MEM
  1549  				p.To.Reg = dstReg
  1550  				p.To.Index = ppc64.REGTMP
  1551  
  1552  				offset = 32
  1553  				rem -= 16
  1554  			}
  1555  		}
  1556  
  1557  		// Generate all the remaining load and store pairs, starting with
  1558  		// as many 8 byte moves as possible, then 4, 2, 1.
  1559  		for rem > 0 {
  1560  			op, size := ppc64.AMOVB, int64(1)
  1561  			switch {
  1562  			case rem >= 8:
  1563  				op, size = ppc64.AMOVD, 8
  1564  			case rem >= 4:
  1565  				op, size = ppc64.AMOVWZ, 4
  1566  			case rem >= 2:
  1567  				op, size = ppc64.AMOVH, 2
  1568  			}
  1569  			// Load
  1570  			p := s.Prog(op)
  1571  			p.To.Type = obj.TYPE_REG
  1572  			p.To.Reg = ppc64.REGTMP
  1573  			p.From.Type = obj.TYPE_MEM
  1574  			p.From.Reg = srcReg
  1575  			p.From.Offset = offset
  1576  
  1577  			// Store
  1578  			p = s.Prog(op)
  1579  			p.From.Type = obj.TYPE_REG
  1580  			p.From.Reg = ppc64.REGTMP
  1581  			p.To.Type = obj.TYPE_MEM
  1582  			p.To.Reg = dstReg
  1583  			p.To.Offset = offset
  1584  			rem -= size
  1585  			offset += size
  1586  		}
  1587  
  1588  	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
  1589  		bytesPerLoop := int64(64)
  1590  		// This is used when moving more
  1591  		// than 8 bytes on power9.  Moves start with
  1592  		// as many 8 byte moves as possible, then
  1593  		// 4, 2, or 1 byte(s) as remaining.  This will
  1594  		// work and be efficient for power8 or later.
  1595  		// If there are 64 or more bytes, then a
  1596  		// loop is generated to move 32 bytes and
  1597  		// update the src and dst addresses on each
  1598  		// iteration. When < 64 bytes, the appropriate
  1599  		// number of moves are generated based on the
  1600  		// size.
  1601  		// When moving >= 64 bytes a loop is used
  1602  		//      MOVD len/32,REG_TMP
  1603  		//      MOVD REG_TMP,CTR
  1604  		// top:
  1605  		//      LXV 0(R21),VS32
  1606  		//      LXV 16(R21),VS33
  1607  		//      ADD $32,R21
  1608  		//      STXV VS32,0(R20)
  1609  		//      STXV VS33,16(R20)
  1610  		//      ADD $32,R20
  1611  		//      BC 16,0,top
  1612  		// Bytes not moved by this loop are moved
  1613  		// with a combination of the following instructions,
  1614  		// starting with the largest sizes and generating as
  1615  		// many as needed, using the appropriate offset value.
  1616  		//      MOVD  n(R21),R31
  1617  		//      MOVD  R31,n(R20)
  1618  		//      MOVW  n1(R21),R31
  1619  		//      MOVW  R31,n1(R20)
  1620  		//      MOVH  n2(R21),R31
  1621  		//      MOVH  R31,n2(R20)
  1622  		//      MOVB  n3(R21),R31
  1623  		//      MOVB  R31,n3(R20)
  1624  
  1625  		// Each loop iteration moves 32 bytes
  1626  		ctr := v.AuxInt / bytesPerLoop
  1627  
  1628  		// Remainder after the loop
  1629  		rem := v.AuxInt % bytesPerLoop
  1630  
  1631  		dstReg := v.Args[0].Reg()
  1632  		srcReg := v.Args[1].Reg()
  1633  
  1634  		offset := int64(0)
  1635  
  1636  		// top of the loop
  1637  		var top *obj.Prog
  1638  
  1639  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1640  		if ctr > 1 {
  1641  			// Set up the CTR
  1642  			p := s.Prog(ppc64.AMOVD)
  1643  			p.From.Type = obj.TYPE_CONST
  1644  			p.From.Offset = ctr
  1645  			p.To.Type = obj.TYPE_REG
  1646  			p.To.Reg = ppc64.REGTMP
  1647  
  1648  			p = s.Prog(ppc64.AMOVD)
  1649  			p.From.Type = obj.TYPE_REG
  1650  			p.From.Reg = ppc64.REGTMP
  1651  			p.To.Type = obj.TYPE_REG
  1652  			p.To.Reg = ppc64.REG_CTR
  1653  
  1654  			p = s.Prog(obj.APCALIGN)
  1655  			p.From.Type = obj.TYPE_CONST
  1656  			p.From.Offset = 16
  1657  
  1658  			// Generate 16 byte loads and stores.
  1659  			p = s.Prog(ppc64.ALXV)
  1660  			p.From.Type = obj.TYPE_MEM
  1661  			p.From.Reg = srcReg
  1662  			p.From.Offset = offset
  1663  			p.To.Type = obj.TYPE_REG
  1664  			p.To.Reg = ppc64.REG_VS32
  1665  			if top == nil {
  1666  				top = p
  1667  			}
  1668  			p = s.Prog(ppc64.ALXV)
  1669  			p.From.Type = obj.TYPE_MEM
  1670  			p.From.Reg = srcReg
  1671  			p.From.Offset = offset + 16
  1672  			p.To.Type = obj.TYPE_REG
  1673  			p.To.Reg = ppc64.REG_VS33
  1674  
  1675  			// generate 16 byte stores
  1676  			p = s.Prog(ppc64.ASTXV)
  1677  			p.From.Type = obj.TYPE_REG
  1678  			p.From.Reg = ppc64.REG_VS32
  1679  			p.To.Type = obj.TYPE_MEM
  1680  			p.To.Reg = dstReg
  1681  			p.To.Offset = offset
  1682  
  1683  			p = s.Prog(ppc64.ASTXV)
  1684  			p.From.Type = obj.TYPE_REG
  1685  			p.From.Reg = ppc64.REG_VS33
  1686  			p.To.Type = obj.TYPE_MEM
  1687  			p.To.Reg = dstReg
  1688  			p.To.Offset = offset + 16
  1689  
  1690  			// Generate 16 byte loads and stores.
  1691  			p = s.Prog(ppc64.ALXV)
  1692  			p.From.Type = obj.TYPE_MEM
  1693  			p.From.Reg = srcReg
  1694  			p.From.Offset = offset + 32
  1695  			p.To.Type = obj.TYPE_REG
  1696  			p.To.Reg = ppc64.REG_VS32
  1697  
  1698  			p = s.Prog(ppc64.ALXV)
  1699  			p.From.Type = obj.TYPE_MEM
  1700  			p.From.Reg = srcReg
  1701  			p.From.Offset = offset + 48
  1702  			p.To.Type = obj.TYPE_REG
  1703  			p.To.Reg = ppc64.REG_VS33
  1704  
  1705  			// generate 16 byte stores
  1706  			p = s.Prog(ppc64.ASTXV)
  1707  			p.From.Type = obj.TYPE_REG
  1708  			p.From.Reg = ppc64.REG_VS32
  1709  			p.To.Type = obj.TYPE_MEM
  1710  			p.To.Reg = dstReg
  1711  			p.To.Offset = offset + 32
  1712  
  1713  			p = s.Prog(ppc64.ASTXV)
  1714  			p.From.Type = obj.TYPE_REG
  1715  			p.From.Reg = ppc64.REG_VS33
  1716  			p.To.Type = obj.TYPE_MEM
  1717  			p.To.Reg = dstReg
  1718  			p.To.Offset = offset + 48
  1719  
  1720  			// increment the src reg for next iteration
  1721  			p = s.Prog(ppc64.AADD)
  1722  			p.Reg = srcReg
  1723  			p.From.Type = obj.TYPE_CONST
  1724  			p.From.Offset = bytesPerLoop
  1725  			p.To.Type = obj.TYPE_REG
  1726  			p.To.Reg = srcReg
  1727  
  1728  			// increment the dst reg for next iteration
  1729  			p = s.Prog(ppc64.AADD)
  1730  			p.Reg = dstReg
  1731  			p.From.Type = obj.TYPE_CONST
  1732  			p.From.Offset = bytesPerLoop
  1733  			p.To.Type = obj.TYPE_REG
  1734  			p.To.Reg = dstReg
  1735  
  1736  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1737  			// to loop top.
  1738  			p = s.Prog(ppc64.ABC)
  1739  			p.From.Type = obj.TYPE_CONST
  1740  			p.From.Offset = ppc64.BO_BCTR
  1741  			p.Reg = ppc64.REG_CR0LT
  1742  			p.To.Type = obj.TYPE_BRANCH
  1743  			p.To.SetTarget(top)
  1744  
  1745  			// srcReg and dstReg were incremented in the loop, so
  1746  			// later instructions start with offset 0.
  1747  			offset = int64(0)
  1748  		}
  1749  
  1750  		// No loop was generated for one iteration, so
  1751  		// add 32 bytes to the remainder to move those bytes.
  1752  		if ctr == 1 {
  1753  			rem += bytesPerLoop
  1754  		}
  1755  		if rem >= 32 {
  1756  			p := s.Prog(ppc64.ALXV)
  1757  			p.From.Type = obj.TYPE_MEM
  1758  			p.From.Reg = srcReg
  1759  			p.To.Type = obj.TYPE_REG
  1760  			p.To.Reg = ppc64.REG_VS32
  1761  
  1762  			p = s.Prog(ppc64.ALXV)
  1763  			p.From.Type = obj.TYPE_MEM
  1764  			p.From.Reg = srcReg
  1765  			p.From.Offset = 16
  1766  			p.To.Type = obj.TYPE_REG
  1767  			p.To.Reg = ppc64.REG_VS33
  1768  
  1769  			p = s.Prog(ppc64.ASTXV)
  1770  			p.From.Type = obj.TYPE_REG
  1771  			p.From.Reg = ppc64.REG_VS32
  1772  			p.To.Type = obj.TYPE_MEM
  1773  			p.To.Reg = dstReg
  1774  
  1775  			p = s.Prog(ppc64.ASTXV)
  1776  			p.From.Type = obj.TYPE_REG
  1777  			p.From.Reg = ppc64.REG_VS33
  1778  			p.To.Type = obj.TYPE_MEM
  1779  			p.To.Reg = dstReg
  1780  			p.To.Offset = 16
  1781  
  1782  			offset = 32
  1783  			rem -= 32
  1784  		}
  1785  
  1786  		if rem >= 16 {
  1787  			// Generate 16 byte loads and stores.
  1788  			p := s.Prog(ppc64.ALXV)
  1789  			p.From.Type = obj.TYPE_MEM
  1790  			p.From.Reg = srcReg
  1791  			p.From.Offset = offset
  1792  			p.To.Type = obj.TYPE_REG
  1793  			p.To.Reg = ppc64.REG_VS32
  1794  
  1795  			p = s.Prog(ppc64.ASTXV)
  1796  			p.From.Type = obj.TYPE_REG
  1797  			p.From.Reg = ppc64.REG_VS32
  1798  			p.To.Type = obj.TYPE_MEM
  1799  			p.To.Reg = dstReg
  1800  			p.To.Offset = offset
  1801  
  1802  			offset += 16
  1803  			rem -= 16
  1804  
  1805  			if rem >= 16 {
  1806  				p := s.Prog(ppc64.ALXV)
  1807  				p.From.Type = obj.TYPE_MEM
  1808  				p.From.Reg = srcReg
  1809  				p.From.Offset = offset
  1810  				p.To.Type = obj.TYPE_REG
  1811  				p.To.Reg = ppc64.REG_VS32
  1812  
  1813  				p = s.Prog(ppc64.ASTXV)
  1814  				p.From.Type = obj.TYPE_REG
  1815  				p.From.Reg = ppc64.REG_VS32
  1816  				p.To.Type = obj.TYPE_MEM
  1817  				p.To.Reg = dstReg
  1818  				p.To.Offset = offset
  1819  
  1820  				offset += 16
  1821  				rem -= 16
  1822  			}
  1823  		}
  1824  		// Generate all the remaining load and store pairs, starting with
  1825  		// as many 8 byte moves as possible, then 4, 2, 1.
  1826  		for rem > 0 {
  1827  			op, size := ppc64.AMOVB, int64(1)
  1828  			switch {
  1829  			case rem >= 8:
  1830  				op, size = ppc64.AMOVD, 8
  1831  			case rem >= 4:
  1832  				op, size = ppc64.AMOVWZ, 4
  1833  			case rem >= 2:
  1834  				op, size = ppc64.AMOVH, 2
  1835  			}
  1836  			// Load
  1837  			p := s.Prog(op)
  1838  			p.To.Type = obj.TYPE_REG
  1839  			p.To.Reg = ppc64.REGTMP
  1840  			p.From.Type = obj.TYPE_MEM
  1841  			p.From.Reg = srcReg
  1842  			p.From.Offset = offset
  1843  
  1844  			// Store
  1845  			p = s.Prog(op)
  1846  			p.From.Type = obj.TYPE_REG
  1847  			p.From.Reg = ppc64.REGTMP
  1848  			p.To.Type = obj.TYPE_MEM
  1849  			p.To.Reg = dstReg
  1850  			p.To.Offset = offset
  1851  			rem -= size
  1852  			offset += size
  1853  		}
  1854  
  1855  	case ssa.OpPPC64CALLstatic:
  1856  		s.Call(v)
  1857  
  1858  	case ssa.OpPPC64CALLtail:
  1859  		s.TailCall(v)
  1860  
  1861  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1862  		p := s.Prog(ppc64.AMOVD)
  1863  		p.From.Type = obj.TYPE_REG
  1864  		p.From.Reg = v.Args[0].Reg()
  1865  		p.To.Type = obj.TYPE_REG
  1866  		p.To.Reg = ppc64.REG_LR
  1867  
  1868  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1869  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1870  		}
  1871  
  1872  		pp := s.Call(v)
  1873  
  1874  		// Convert the call into a blrl with hint this is not a subroutine return.
  1875  		// The full bclrl opcode must be specified when passing a hint.
  1876  		pp.As = ppc64.ABCL
  1877  		pp.From.Type = obj.TYPE_CONST
  1878  		pp.From.Offset = ppc64.BO_ALWAYS
  1879  		pp.Reg = ppc64.REG_CR0LT	// The preferred value if BI is ignored.
  1880  		pp.To.Reg = ppc64.REG_LR
  1881  		pp.AddRestSourceConst(1)
  1882  
  1883  		if ppc64.NeedTOCpointer(base.Ctxt) {
  1884  			// When compiling Go into PIC, the function we just
  1885  			// called via pointer might have been implemented in
  1886  			// a separate module and so overwritten the TOC
  1887  			// pointer in R2; reload it.
  1888  			q := s.Prog(ppc64.AMOVD)
  1889  			q.From.Type = obj.TYPE_MEM
  1890  			q.From.Offset = 24
  1891  			q.From.Reg = ppc64.REGSP
  1892  			q.To.Type = obj.TYPE_REG
  1893  			q.To.Reg = ppc64.REG_R2
  1894  		}
  1895  
  1896  	case ssa.OpPPC64LoweredWB:
  1897  		p := s.Prog(obj.ACALL)
  1898  		p.To.Type = obj.TYPE_MEM
  1899  		p.To.Name = obj.NAME_EXTERN
  1900  		// AuxInt encodes how many buffer entries we need.
  1901  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1902  
  1903  	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1904  		p := s.Prog(obj.ACALL)
  1905  		p.To.Type = obj.TYPE_MEM
  1906  		p.To.Name = obj.NAME_EXTERN
  1907  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1908  		s.UseArgs(16)	// space used in callee args area by assembly stubs
  1909  
  1910  	case ssa.OpPPC64LoweredNilCheck:
  1911  		if buildcfg.GOOS == "aix" {
  1912  			// CMP Rarg0, R0
  1913  			// BNE 2(PC)
  1914  			// STW R0, 0(R0)
  1915  			// NOP (so the BNE has somewhere to land)
  1916  
  1917  			// CMP Rarg0, R0
  1918  			p := s.Prog(ppc64.ACMP)
  1919  			p.From.Type = obj.TYPE_REG
  1920  			p.From.Reg = v.Args[0].Reg()
  1921  			p.To.Type = obj.TYPE_REG
  1922  			p.To.Reg = ppc64.REG_R0
  1923  
  1924  			// BNE 2(PC)
  1925  			p2 := s.Prog(ppc64.ABNE)
  1926  			p2.To.Type = obj.TYPE_BRANCH
  1927  
  1928  			// STW R0, 0(R0)
  1929  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1930  			p = s.Prog(ppc64.AMOVW)
  1931  			p.From.Type = obj.TYPE_REG
  1932  			p.From.Reg = ppc64.REG_R0
  1933  			p.To.Type = obj.TYPE_MEM
  1934  			p.To.Reg = ppc64.REG_R0
  1935  
  1936  			// NOP (so the BNE has somewhere to land)
  1937  			nop := s.Prog(obj.ANOP)
  1938  			p2.To.SetTarget(nop)
  1939  
  1940  		} else {
  1941  			// Issue a load which will fault if arg is nil.
  1942  			p := s.Prog(ppc64.AMOVBZ)
  1943  			p.From.Type = obj.TYPE_MEM
  1944  			p.From.Reg = v.Args[0].Reg()
  1945  			ssagen.AddAux(&p.From, v)
  1946  			p.To.Type = obj.TYPE_REG
  1947  			p.To.Reg = ppc64.REGTMP
  1948  		}
  1949  		if logopt.Enabled() {
  1950  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1951  		}
  1952  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 {	// v.Pos.Line()==1 in generated wrappers
  1953  			base.WarnfAt(v.Pos, "generated nil check")
  1954  		}
  1955  
  1956  	// These should be resolved by rules and not make it here.
  1957  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  1958  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  1959  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  1960  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  1961  	case ssa.OpPPC64InvertFlags:
  1962  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1963  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1964  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1965  	case ssa.OpClobber, ssa.OpClobberReg:
  1966  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1967  	default:
  1968  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1969  	}
  1970  }
  1971  
  1972  var blockJump = [...]struct {
  1973  	asm, invasm	obj.As
  1974  	asmeq, invasmun	bool
  1975  }{
  1976  	ssa.BlockPPC64EQ:	{ppc64.ABEQ, ppc64.ABNE, false, false},
  1977  	ssa.BlockPPC64NE:	{ppc64.ABNE, ppc64.ABEQ, false, false},
  1978  
  1979  	ssa.BlockPPC64LT:	{ppc64.ABLT, ppc64.ABGE, false, false},
  1980  	ssa.BlockPPC64GE:	{ppc64.ABGE, ppc64.ABLT, false, false},
  1981  	ssa.BlockPPC64LE:	{ppc64.ABLE, ppc64.ABGT, false, false},
  1982  	ssa.BlockPPC64GT:	{ppc64.ABGT, ppc64.ABLE, false, false},
  1983  
  1984  	// TODO: need to work FP comparisons into block jumps
  1985  	ssa.BlockPPC64FLT:	{ppc64.ABLT, ppc64.ABGE, false, false},
  1986  	ssa.BlockPPC64FGE:	{ppc64.ABGT, ppc64.ABLT, true, true},	// GE = GT or EQ; !GE = LT or UN
  1987  	ssa.BlockPPC64FLE:	{ppc64.ABLT, ppc64.ABGT, true, true},	// LE = LT or EQ; !LE = GT or UN
  1988  	ssa.BlockPPC64FGT:	{ppc64.ABGT, ppc64.ABLE, false, false},
  1989  }
  1990  
  1991  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1992  	switch b.Kind {
  1993  	case ssa.BlockDefer:
  1994  		// defer returns in R3:
  1995  		// 0 if we should continue executing
  1996  		// 1 if we should jump to deferreturn call
  1997  		p := s.Prog(ppc64.ACMP)
  1998  		p.From.Type = obj.TYPE_REG
  1999  		p.From.Reg = ppc64.REG_R3
  2000  		p.To.Type = obj.TYPE_REG
  2001  		p.To.Reg = ppc64.REG_R0
  2002  
  2003  		p = s.Prog(ppc64.ABNE)
  2004  		p.To.Type = obj.TYPE_BRANCH
  2005  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  2006  		if b.Succs[0].Block() != next {
  2007  			p := s.Prog(obj.AJMP)
  2008  			p.To.Type = obj.TYPE_BRANCH
  2009  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2010  		}
  2011  
  2012  	case ssa.BlockPlain:
  2013  		if b.Succs[0].Block() != next {
  2014  			p := s.Prog(obj.AJMP)
  2015  			p.To.Type = obj.TYPE_BRANCH
  2016  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  2017  		}
  2018  	case ssa.BlockExit, ssa.BlockRetJmp:
  2019  	case ssa.BlockRet:
  2020  		s.Prog(obj.ARET)
  2021  
  2022  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  2023  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  2024  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  2025  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  2026  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  2027  		jmp := blockJump[b.Kind]
  2028  		switch next {
  2029  		case b.Succs[0].Block():
  2030  			s.Br(jmp.invasm, b.Succs[1].Block())
  2031  			if jmp.invasmun {
  2032  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2033  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  2034  			}
  2035  		case b.Succs[1].Block():
  2036  			s.Br(jmp.asm, b.Succs[0].Block())
  2037  			if jmp.asmeq {
  2038  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2039  			}
  2040  		default:
  2041  			if b.Likely != ssa.BranchUnlikely {
  2042  				s.Br(jmp.asm, b.Succs[0].Block())
  2043  				if jmp.asmeq {
  2044  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  2045  				}
  2046  				s.Br(obj.AJMP, b.Succs[1].Block())
  2047  			} else {
  2048  				s.Br(jmp.invasm, b.Succs[1].Block())
  2049  				if jmp.invasmun {
  2050  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  2051  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  2052  				}
  2053  				s.Br(obj.AJMP, b.Succs[0].Block())
  2054  			}
  2055  		}
  2056  	default:
  2057  		b.Fatalf("branch not implemented: %s", b.LongString())
  2058  	}
  2059  }
  2060  
  2061  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2062  	p := s.Prog(loadByType(t))
  2063  	p.From.Type = obj.TYPE_MEM
  2064  	p.From.Name = obj.NAME_AUTO
  2065  	p.From.Sym = n.Linksym()
  2066  	p.From.Offset = n.FrameOffset() + off
  2067  	p.To.Type = obj.TYPE_REG
  2068  	p.To.Reg = reg
  2069  	return p
  2070  }
  2071  
  2072  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  2073  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  2074  	p.To.Name = obj.NAME_PARAM
  2075  	p.To.Sym = n.Linksym()
  2076  	p.Pos = p.Pos.WithNotStmt()
  2077  	return p
  2078  }