github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ppc64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ppc64
     6  
     7  import (
     8  	"github.com/gagliardetto/golang-go/cmd/compile/internal/gc"
     9  	"github.com/gagliardetto/golang-go/cmd/compile/internal/logopt"
    10  	"github.com/gagliardetto/golang-go/cmd/compile/internal/ssa"
    11  	"github.com/gagliardetto/golang-go/cmd/compile/internal/types"
    12  	"github.com/gagliardetto/golang-go/cmd/internal/obj"
    13  	"github.com/gagliardetto/golang-go/cmd/internal/obj/ppc64"
    14  	"github.com/gagliardetto/golang-go/cmd/internal/objabi"
    15  	"math"
    16  	"strings"
    17  )
    18  
    19  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    20  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    21  	//	flive := b.FlagsLiveAtEnd
    22  	//	if b.Control != nil && b.Control.Type.IsFlags() {
    23  	//		flive = true
    24  	//	}
    25  	//	for i := len(b.Values) - 1; i >= 0; i-- {
    26  	//		v := b.Values[i]
    27  	//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    28  	//			// The "mark" is any non-nil Aux value.
    29  	//			v.Aux = v
    30  	//		}
    31  	//		if v.Type.IsFlags() {
    32  	//			flive = false
    33  	//		}
    34  	//		for _, a := range v.Args {
    35  	//			if a.Type.IsFlags() {
    36  	//				flive = true
    37  	//			}
    38  	//		}
    39  	//	}
    40  }
    41  
    42  // loadByType returns the load instruction of the given type.
    43  func loadByType(t *types.Type) obj.As {
    44  	if t.IsFloat() {
    45  		switch t.Size() {
    46  		case 4:
    47  			return ppc64.AFMOVS
    48  		case 8:
    49  			return ppc64.AFMOVD
    50  		}
    51  	} else {
    52  		switch t.Size() {
    53  		case 1:
    54  			if t.IsSigned() {
    55  				return ppc64.AMOVB
    56  			} else {
    57  				return ppc64.AMOVBZ
    58  			}
    59  		case 2:
    60  			if t.IsSigned() {
    61  				return ppc64.AMOVH
    62  			} else {
    63  				return ppc64.AMOVHZ
    64  			}
    65  		case 4:
    66  			if t.IsSigned() {
    67  				return ppc64.AMOVW
    68  			} else {
    69  				return ppc64.AMOVWZ
    70  			}
    71  		case 8:
    72  			return ppc64.AMOVD
    73  		}
    74  	}
    75  	panic("bad load type")
    76  }
    77  
    78  // storeByType returns the store instruction of the given type.
    79  func storeByType(t *types.Type) obj.As {
    80  	if t.IsFloat() {
    81  		switch t.Size() {
    82  		case 4:
    83  			return ppc64.AFMOVS
    84  		case 8:
    85  			return ppc64.AFMOVD
    86  		}
    87  	} else {
    88  		switch t.Size() {
    89  		case 1:
    90  			return ppc64.AMOVB
    91  		case 2:
    92  			return ppc64.AMOVH
    93  		case 4:
    94  			return ppc64.AMOVW
    95  		case 8:
    96  			return ppc64.AMOVD
    97  		}
    98  	}
    99  	panic("bad store type")
   100  }
   101  
   102  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   103  	switch v.Op {
   104  	case ssa.OpCopy:
   105  		t := v.Type
   106  		if t.IsMemory() {
   107  			return
   108  		}
   109  		x := v.Args[0].Reg()
   110  		y := v.Reg()
   111  		if x != y {
   112  			rt := obj.TYPE_REG
   113  			op := ppc64.AMOVD
   114  
   115  			if t.IsFloat() {
   116  				op = ppc64.AFMOVD
   117  			}
   118  			p := s.Prog(op)
   119  			p.From.Type = rt
   120  			p.From.Reg = x
   121  			p.To.Type = rt
   122  			p.To.Reg = y
   123  		}
   124  
   125  	case ssa.OpPPC64LoweredMuluhilo:
   126  		// MULHDU	Rarg1, Rarg0, Reg0
   127  		// MULLD	Rarg1, Rarg0, Reg1
   128  		r0 := v.Args[0].Reg()
   129  		r1 := v.Args[1].Reg()
   130  		p := s.Prog(ppc64.AMULHDU)
   131  		p.From.Type = obj.TYPE_REG
   132  		p.From.Reg = r1
   133  		p.Reg = r0
   134  		p.To.Type = obj.TYPE_REG
   135  		p.To.Reg = v.Reg0()
   136  		p1 := s.Prog(ppc64.AMULLD)
   137  		p1.From.Type = obj.TYPE_REG
   138  		p1.From.Reg = r1
   139  		p1.Reg = r0
   140  		p1.To.Type = obj.TYPE_REG
   141  		p1.To.Reg = v.Reg1()
   142  
   143  	case ssa.OpPPC64LoweredAdd64Carry:
   144  		// ADDC		Rarg2, -1, Rtmp
   145  		// ADDE		Rarg1, Rarg0, Reg0
   146  		// ADDZE	Rzero, Reg1
   147  		r0 := v.Args[0].Reg()
   148  		r1 := v.Args[1].Reg()
   149  		r2 := v.Args[2].Reg()
   150  		p := s.Prog(ppc64.AADDC)
   151  		p.From.Type = obj.TYPE_CONST
   152  		p.From.Offset = -1
   153  		p.Reg = r2
   154  		p.To.Type = obj.TYPE_REG
   155  		p.To.Reg = ppc64.REGTMP
   156  		p1 := s.Prog(ppc64.AADDE)
   157  		p1.From.Type = obj.TYPE_REG
   158  		p1.From.Reg = r1
   159  		p1.Reg = r0
   160  		p1.To.Type = obj.TYPE_REG
   161  		p1.To.Reg = v.Reg0()
   162  		p2 := s.Prog(ppc64.AADDZE)
   163  		p2.From.Type = obj.TYPE_REG
   164  		p2.From.Reg = ppc64.REGZERO
   165  		p2.To.Type = obj.TYPE_REG
   166  		p2.To.Reg = v.Reg1()
   167  
   168  	case ssa.OpPPC64LoweredAtomicAnd8,
   169  		ssa.OpPPC64LoweredAtomicOr8:
   170  		// LWSYNC
   171  		// LBAR		(Rarg0), Rtmp
   172  		// AND/OR	Rarg1, Rtmp
   173  		// STBCCC	Rtmp, (Rarg0)
   174  		// BNE		-3(PC)
   175  		r0 := v.Args[0].Reg()
   176  		r1 := v.Args[1].Reg()
   177  		// LWSYNC - Assuming shared data not write-through-required nor
   178  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   179  		plwsync := s.Prog(ppc64.ALWSYNC)
   180  		plwsync.To.Type = obj.TYPE_NONE
   181  		p := s.Prog(ppc64.ALBAR)
   182  		p.From.Type = obj.TYPE_MEM
   183  		p.From.Reg = r0
   184  		p.To.Type = obj.TYPE_REG
   185  		p.To.Reg = ppc64.REGTMP
   186  		p1 := s.Prog(v.Op.Asm())
   187  		p1.From.Type = obj.TYPE_REG
   188  		p1.From.Reg = r1
   189  		p1.To.Type = obj.TYPE_REG
   190  		p1.To.Reg = ppc64.REGTMP
   191  		p2 := s.Prog(ppc64.ASTBCCC)
   192  		p2.From.Type = obj.TYPE_REG
   193  		p2.From.Reg = ppc64.REGTMP
   194  		p2.To.Type = obj.TYPE_MEM
   195  		p2.To.Reg = r0
   196  		p2.RegTo2 = ppc64.REGTMP
   197  		p3 := s.Prog(ppc64.ABNE)
   198  		p3.To.Type = obj.TYPE_BRANCH
   199  		gc.Patch(p3, p)
   200  
   201  	case ssa.OpPPC64LoweredAtomicAdd32,
   202  		ssa.OpPPC64LoweredAtomicAdd64:
   203  		// LWSYNC
   204  		// LDAR/LWAR    (Rarg0), Rout
   205  		// ADD		Rarg1, Rout
   206  		// STDCCC/STWCCC Rout, (Rarg0)
   207  		// BNE         -3(PC)
   208  		// MOVW		Rout,Rout (if Add32)
   209  		ld := ppc64.ALDAR
   210  		st := ppc64.ASTDCCC
   211  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   212  			ld = ppc64.ALWAR
   213  			st = ppc64.ASTWCCC
   214  		}
   215  		r0 := v.Args[0].Reg()
   216  		r1 := v.Args[1].Reg()
   217  		out := v.Reg0()
   218  		// LWSYNC - Assuming shared data not write-through-required nor
   219  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   220  		plwsync := s.Prog(ppc64.ALWSYNC)
   221  		plwsync.To.Type = obj.TYPE_NONE
   222  		// LDAR or LWAR
   223  		p := s.Prog(ld)
   224  		p.From.Type = obj.TYPE_MEM
   225  		p.From.Reg = r0
   226  		p.To.Type = obj.TYPE_REG
   227  		p.To.Reg = out
   228  		// ADD reg1,out
   229  		p1 := s.Prog(ppc64.AADD)
   230  		p1.From.Type = obj.TYPE_REG
   231  		p1.From.Reg = r1
   232  		p1.To.Reg = out
   233  		p1.To.Type = obj.TYPE_REG
   234  		// STDCCC or STWCCC
   235  		p3 := s.Prog(st)
   236  		p3.From.Type = obj.TYPE_REG
   237  		p3.From.Reg = out
   238  		p3.To.Type = obj.TYPE_MEM
   239  		p3.To.Reg = r0
   240  		// BNE retry
   241  		p4 := s.Prog(ppc64.ABNE)
   242  		p4.To.Type = obj.TYPE_BRANCH
   243  		gc.Patch(p4, p)
   244  
   245  		// Ensure a 32 bit result
   246  		if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   247  			p5 := s.Prog(ppc64.AMOVWZ)
   248  			p5.To.Type = obj.TYPE_REG
   249  			p5.To.Reg = out
   250  			p5.From.Type = obj.TYPE_REG
   251  			p5.From.Reg = out
   252  		}
   253  
   254  	case ssa.OpPPC64LoweredAtomicExchange32,
   255  		ssa.OpPPC64LoweredAtomicExchange64:
   256  		// LWSYNC
   257  		// LDAR/LWAR    (Rarg0), Rout
   258  		// STDCCC/STWCCC Rout, (Rarg0)
   259  		// BNE         -2(PC)
   260  		// ISYNC
   261  		ld := ppc64.ALDAR
   262  		st := ppc64.ASTDCCC
   263  		if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   264  			ld = ppc64.ALWAR
   265  			st = ppc64.ASTWCCC
   266  		}
   267  		r0 := v.Args[0].Reg()
   268  		r1 := v.Args[1].Reg()
   269  		out := v.Reg0()
   270  		// LWSYNC - Assuming shared data not write-through-required nor
   271  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   272  		plwsync := s.Prog(ppc64.ALWSYNC)
   273  		plwsync.To.Type = obj.TYPE_NONE
   274  		// LDAR or LWAR
   275  		p := s.Prog(ld)
   276  		p.From.Type = obj.TYPE_MEM
   277  		p.From.Reg = r0
   278  		p.To.Type = obj.TYPE_REG
   279  		p.To.Reg = out
   280  		// STDCCC or STWCCC
   281  		p1 := s.Prog(st)
   282  		p1.From.Type = obj.TYPE_REG
   283  		p1.From.Reg = r1
   284  		p1.To.Type = obj.TYPE_MEM
   285  		p1.To.Reg = r0
   286  		// BNE retry
   287  		p2 := s.Prog(ppc64.ABNE)
   288  		p2.To.Type = obj.TYPE_BRANCH
   289  		gc.Patch(p2, p)
   290  		// ISYNC
   291  		pisync := s.Prog(ppc64.AISYNC)
   292  		pisync.To.Type = obj.TYPE_NONE
   293  
   294  	case ssa.OpPPC64LoweredAtomicLoad8,
   295  		ssa.OpPPC64LoweredAtomicLoad32,
   296  		ssa.OpPPC64LoweredAtomicLoad64,
   297  		ssa.OpPPC64LoweredAtomicLoadPtr:
   298  		// SYNC
   299  		// MOVB/MOVD/MOVW (Rarg0), Rout
   300  		// CMP Rout,Rout
   301  		// BNE 1(PC)
   302  		// ISYNC
   303  		ld := ppc64.AMOVD
   304  		cmp := ppc64.ACMP
   305  		switch v.Op {
   306  		case ssa.OpPPC64LoweredAtomicLoad8:
   307  			ld = ppc64.AMOVBZ
   308  		case ssa.OpPPC64LoweredAtomicLoad32:
   309  			ld = ppc64.AMOVWZ
   310  			cmp = ppc64.ACMPW
   311  		}
   312  		arg0 := v.Args[0].Reg()
   313  		out := v.Reg0()
   314  		// SYNC when AuxInt == 1; otherwise, load-acquire
   315  		if v.AuxInt == 1 {
   316  			psync := s.Prog(ppc64.ASYNC)
   317  			psync.To.Type = obj.TYPE_NONE
   318  		}
   319  		// Load
   320  		p := s.Prog(ld)
   321  		p.From.Type = obj.TYPE_MEM
   322  		p.From.Reg = arg0
   323  		p.To.Type = obj.TYPE_REG
   324  		p.To.Reg = out
   325  		// CMP
   326  		p1 := s.Prog(cmp)
   327  		p1.From.Type = obj.TYPE_REG
   328  		p1.From.Reg = out
   329  		p1.To.Type = obj.TYPE_REG
   330  		p1.To.Reg = out
   331  		// BNE
   332  		p2 := s.Prog(ppc64.ABNE)
   333  		p2.To.Type = obj.TYPE_BRANCH
   334  		// ISYNC
   335  		pisync := s.Prog(ppc64.AISYNC)
   336  		pisync.To.Type = obj.TYPE_NONE
   337  		gc.Patch(p2, pisync)
   338  
   339  	case ssa.OpPPC64LoweredAtomicStore8,
   340  		ssa.OpPPC64LoweredAtomicStore32,
   341  		ssa.OpPPC64LoweredAtomicStore64:
   342  		// SYNC or LWSYNC
   343  		// MOVB/MOVW/MOVD arg1,(arg0)
   344  		st := ppc64.AMOVD
   345  		switch v.Op {
   346  		case ssa.OpPPC64LoweredAtomicStore8:
   347  			st = ppc64.AMOVB
   348  		case ssa.OpPPC64LoweredAtomicStore32:
   349  			st = ppc64.AMOVW
   350  		}
   351  		arg0 := v.Args[0].Reg()
   352  		arg1 := v.Args[1].Reg()
   353  		// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   354  		// SYNC
   355  		syncOp := ppc64.ASYNC
   356  		if v.AuxInt == 0 {
   357  			syncOp = ppc64.ALWSYNC
   358  		}
   359  		psync := s.Prog(syncOp)
   360  		psync.To.Type = obj.TYPE_NONE
   361  		// Store
   362  		p := s.Prog(st)
   363  		p.To.Type = obj.TYPE_MEM
   364  		p.To.Reg = arg0
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = arg1
   367  
   368  	case ssa.OpPPC64LoweredAtomicCas64,
   369  		ssa.OpPPC64LoweredAtomicCas32:
   370  		// LWSYNC
   371  		// loop:
   372  		// LDAR        (Rarg0), MutexHint, Rtmp
   373  		// CMP         Rarg1, Rtmp
   374  		// BNE         fail
   375  		// STDCCC      Rarg2, (Rarg0)
   376  		// BNE         loop
   377  		// LWSYNC      // Only for sequential consistency; not required in CasRel.
   378  		// MOVD        $1, Rout
   379  		// BR          end
   380  		// fail:
   381  		// MOVD        $0, Rout
   382  		// end:
   383  		ld := ppc64.ALDAR
   384  		st := ppc64.ASTDCCC
   385  		cmp := ppc64.ACMP
   386  		if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   387  			ld = ppc64.ALWAR
   388  			st = ppc64.ASTWCCC
   389  			cmp = ppc64.ACMPW
   390  		}
   391  		r0 := v.Args[0].Reg()
   392  		r1 := v.Args[1].Reg()
   393  		r2 := v.Args[2].Reg()
   394  		out := v.Reg0()
   395  		// LWSYNC - Assuming shared data not write-through-required nor
   396  		// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   397  		plwsync1 := s.Prog(ppc64.ALWSYNC)
   398  		plwsync1.To.Type = obj.TYPE_NONE
   399  		// LDAR or LWAR
   400  		p := s.Prog(ld)
   401  		p.From.Type = obj.TYPE_MEM
   402  		p.From.Reg = r0
   403  		p.To.Type = obj.TYPE_REG
   404  		p.To.Reg = ppc64.REGTMP
   405  		// If it is a Compare-and-Swap-Release operation, set the EH field with
   406  		// the release hint.
   407  		if v.AuxInt == 0 {
   408  			p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
   409  		}
   410  		// CMP reg1,reg2
   411  		p1 := s.Prog(cmp)
   412  		p1.From.Type = obj.TYPE_REG
   413  		p1.From.Reg = r1
   414  		p1.To.Reg = ppc64.REGTMP
   415  		p1.To.Type = obj.TYPE_REG
   416  		// BNE cas_fail
   417  		p2 := s.Prog(ppc64.ABNE)
   418  		p2.To.Type = obj.TYPE_BRANCH
   419  		// STDCCC or STWCCC
   420  		p3 := s.Prog(st)
   421  		p3.From.Type = obj.TYPE_REG
   422  		p3.From.Reg = r2
   423  		p3.To.Type = obj.TYPE_MEM
   424  		p3.To.Reg = r0
   425  		// BNE retry
   426  		p4 := s.Prog(ppc64.ABNE)
   427  		p4.To.Type = obj.TYPE_BRANCH
   428  		gc.Patch(p4, p)
   429  		// LWSYNC - Assuming shared data not write-through-required nor
   430  		// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   431  		// If the operation is a CAS-Release, then synchronization is not necessary.
   432  		if v.AuxInt != 0 {
   433  			plwsync2 := s.Prog(ppc64.ALWSYNC)
   434  			plwsync2.To.Type = obj.TYPE_NONE
   435  		}
   436  		// return true
   437  		p5 := s.Prog(ppc64.AMOVD)
   438  		p5.From.Type = obj.TYPE_CONST
   439  		p5.From.Offset = 1
   440  		p5.To.Type = obj.TYPE_REG
   441  		p5.To.Reg = out
   442  		// BR done
   443  		p6 := s.Prog(obj.AJMP)
   444  		p6.To.Type = obj.TYPE_BRANCH
   445  		// return false
   446  		p7 := s.Prog(ppc64.AMOVD)
   447  		p7.From.Type = obj.TYPE_CONST
   448  		p7.From.Offset = 0
   449  		p7.To.Type = obj.TYPE_REG
   450  		p7.To.Reg = out
   451  		gc.Patch(p2, p7)
   452  		// done (label)
   453  		p8 := s.Prog(obj.ANOP)
   454  		gc.Patch(p6, p8)
   455  
   456  	case ssa.OpPPC64LoweredGetClosurePtr:
   457  		// Closure pointer is R11 (already)
   458  		gc.CheckLoweredGetClosurePtr(v)
   459  
   460  	case ssa.OpPPC64LoweredGetCallerSP:
   461  		// caller's SP is FixedFrameSize below the address of the first arg
   462  		p := s.Prog(ppc64.AMOVD)
   463  		p.From.Type = obj.TYPE_ADDR
   464  		p.From.Offset = -gc.Ctxt.FixedFrameSize()
   465  		p.From.Name = obj.NAME_PARAM
   466  		p.To.Type = obj.TYPE_REG
   467  		p.To.Reg = v.Reg()
   468  
   469  	case ssa.OpPPC64LoweredGetCallerPC:
   470  		p := s.Prog(obj.AGETCALLERPC)
   471  		p.To.Type = obj.TYPE_REG
   472  		p.To.Reg = v.Reg()
   473  
   474  	case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   475  		// input is already rounded
   476  
   477  	case ssa.OpLoadReg:
   478  		loadOp := loadByType(v.Type)
   479  		p := s.Prog(loadOp)
   480  		gc.AddrAuto(&p.From, v.Args[0])
   481  		p.To.Type = obj.TYPE_REG
   482  		p.To.Reg = v.Reg()
   483  
   484  	case ssa.OpStoreReg:
   485  		storeOp := storeByType(v.Type)
   486  		p := s.Prog(storeOp)
   487  		p.From.Type = obj.TYPE_REG
   488  		p.From.Reg = v.Args[0].Reg()
   489  		gc.AddrAuto(&p.To, v)
   490  
   491  	case ssa.OpPPC64DIVD:
   492  		// For now,
   493  		//
   494  		// cmp arg1, -1
   495  		// be  ahead
   496  		// v = arg0 / arg1
   497  		// b over
   498  		// ahead: v = - arg0
   499  		// over: nop
   500  		r := v.Reg()
   501  		r0 := v.Args[0].Reg()
   502  		r1 := v.Args[1].Reg()
   503  
   504  		p := s.Prog(ppc64.ACMP)
   505  		p.From.Type = obj.TYPE_REG
   506  		p.From.Reg = r1
   507  		p.To.Type = obj.TYPE_CONST
   508  		p.To.Offset = -1
   509  
   510  		pbahead := s.Prog(ppc64.ABEQ)
   511  		pbahead.To.Type = obj.TYPE_BRANCH
   512  
   513  		p = s.Prog(v.Op.Asm())
   514  		p.From.Type = obj.TYPE_REG
   515  		p.From.Reg = r1
   516  		p.Reg = r0
   517  		p.To.Type = obj.TYPE_REG
   518  		p.To.Reg = r
   519  
   520  		pbover := s.Prog(obj.AJMP)
   521  		pbover.To.Type = obj.TYPE_BRANCH
   522  
   523  		p = s.Prog(ppc64.ANEG)
   524  		p.To.Type = obj.TYPE_REG
   525  		p.To.Reg = r
   526  		p.From.Type = obj.TYPE_REG
   527  		p.From.Reg = r0
   528  		gc.Patch(pbahead, p)
   529  
   530  		p = s.Prog(obj.ANOP)
   531  		gc.Patch(pbover, p)
   532  
   533  	case ssa.OpPPC64DIVW:
   534  		// word-width version of above
   535  		r := v.Reg()
   536  		r0 := v.Args[0].Reg()
   537  		r1 := v.Args[1].Reg()
   538  
   539  		p := s.Prog(ppc64.ACMPW)
   540  		p.From.Type = obj.TYPE_REG
   541  		p.From.Reg = r1
   542  		p.To.Type = obj.TYPE_CONST
   543  		p.To.Offset = -1
   544  
   545  		pbahead := s.Prog(ppc64.ABEQ)
   546  		pbahead.To.Type = obj.TYPE_BRANCH
   547  
   548  		p = s.Prog(v.Op.Asm())
   549  		p.From.Type = obj.TYPE_REG
   550  		p.From.Reg = r1
   551  		p.Reg = r0
   552  		p.To.Type = obj.TYPE_REG
   553  		p.To.Reg = r
   554  
   555  		pbover := s.Prog(obj.AJMP)
   556  		pbover.To.Type = obj.TYPE_BRANCH
   557  
   558  		p = s.Prog(ppc64.ANEG)
   559  		p.To.Type = obj.TYPE_REG
   560  		p.To.Reg = r
   561  		p.From.Type = obj.TYPE_REG
   562  		p.From.Reg = r0
   563  		gc.Patch(pbahead, p)
   564  
   565  		p = s.Prog(obj.ANOP)
   566  		gc.Patch(pbover, p)
   567  
   568  	case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   569  		ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   570  		ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   571  		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   572  		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   573  		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   574  		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
   575  		r := v.Reg()
   576  		r1 := v.Args[0].Reg()
   577  		r2 := v.Args[1].Reg()
   578  		p := s.Prog(v.Op.Asm())
   579  		p.From.Type = obj.TYPE_REG
   580  		p.From.Reg = r2
   581  		p.Reg = r1
   582  		p.To.Type = obj.TYPE_REG
   583  		p.To.Reg = r
   584  
   585  	case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
   586  		r1 := v.Args[0].Reg()
   587  		r2 := v.Args[1].Reg()
   588  		p := s.Prog(v.Op.Asm())
   589  		p.From.Type = obj.TYPE_REG
   590  		p.From.Reg = r2
   591  		p.Reg = r1
   592  		p.To.Type = obj.TYPE_REG
   593  		p.To.Reg = ppc64.REGTMP // result is not needed
   594  
   595  	case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   596  		p := s.Prog(v.Op.Asm())
   597  		p.From.Type = obj.TYPE_CONST
   598  		p.From.Offset = v.AuxInt
   599  		p.Reg = v.Args[0].Reg()
   600  		p.To.Type = obj.TYPE_REG
   601  		p.To.Reg = v.Reg()
   602  
   603  	case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   604  		r := v.Reg()
   605  		r1 := v.Args[0].Reg()
   606  		r2 := v.Args[1].Reg()
   607  		r3 := v.Args[2].Reg()
   608  		// r = r1*r2 ± r3
   609  		p := s.Prog(v.Op.Asm())
   610  		p.From.Type = obj.TYPE_REG
   611  		p.From.Reg = r1
   612  		p.Reg = r3
   613  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   614  		p.To.Type = obj.TYPE_REG
   615  		p.To.Reg = r
   616  
   617  	case ssa.OpPPC64MaskIfNotCarry:
   618  		r := v.Reg()
   619  		p := s.Prog(v.Op.Asm())
   620  		p.From.Type = obj.TYPE_REG
   621  		p.From.Reg = ppc64.REGZERO
   622  		p.To.Type = obj.TYPE_REG
   623  		p.To.Reg = r
   624  
   625  	case ssa.OpPPC64ADDconstForCarry:
   626  		r1 := v.Args[0].Reg()
   627  		p := s.Prog(v.Op.Asm())
   628  		p.Reg = r1
   629  		p.From.Type = obj.TYPE_CONST
   630  		p.From.Offset = v.AuxInt
   631  		p.To.Type = obj.TYPE_REG
   632  		p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
   633  
   634  	case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   635  		ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   636  		ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   637  		ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
   638  		r := v.Reg()
   639  		p := s.Prog(v.Op.Asm())
   640  		p.To.Type = obj.TYPE_REG
   641  		p.To.Reg = r
   642  		p.From.Type = obj.TYPE_REG
   643  		p.From.Reg = v.Args[0].Reg()
   644  
   645  	case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   646  		ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
   647  		p := s.Prog(v.Op.Asm())
   648  		p.Reg = v.Args[0].Reg()
   649  		p.From.Type = obj.TYPE_CONST
   650  		p.From.Offset = v.AuxInt
   651  		p.To.Type = obj.TYPE_REG
   652  		p.To.Reg = v.Reg()
   653  
   654  	case ssa.OpPPC64ANDCCconst:
   655  		p := s.Prog(v.Op.Asm())
   656  		p.Reg = v.Args[0].Reg()
   657  
   658  		if v.Aux != nil {
   659  			p.From.Type = obj.TYPE_CONST
   660  			p.From.Offset = gc.AuxOffset(v)
   661  		} else {
   662  			p.From.Type = obj.TYPE_CONST
   663  			p.From.Offset = v.AuxInt
   664  		}
   665  
   666  		p.To.Type = obj.TYPE_REG
   667  		p.To.Reg = ppc64.REGTMP // discard result
   668  
   669  	case ssa.OpPPC64MOVDaddr:
   670  		switch v.Aux.(type) {
   671  		default:
   672  			v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   673  		case nil:
   674  			// If aux offset and aux int are both 0, and the same
   675  			// input and output regs are used, no instruction
   676  			// needs to be generated, since it would just be
   677  			// addi rx, rx, 0.
   678  			if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   679  				p := s.Prog(ppc64.AMOVD)
   680  				p.From.Type = obj.TYPE_ADDR
   681  				p.From.Reg = v.Args[0].Reg()
   682  				p.From.Offset = v.AuxInt
   683  				p.To.Type = obj.TYPE_REG
   684  				p.To.Reg = v.Reg()
   685  			}
   686  
   687  		case *obj.LSym, *gc.Node:
   688  			p := s.Prog(ppc64.AMOVD)
   689  			p.From.Type = obj.TYPE_ADDR
   690  			p.From.Reg = v.Args[0].Reg()
   691  			p.To.Type = obj.TYPE_REG
   692  			p.To.Reg = v.Reg()
   693  			gc.AddAux(&p.From, v)
   694  
   695  		}
   696  
   697  	case ssa.OpPPC64MOVDconst:
   698  		p := s.Prog(v.Op.Asm())
   699  		p.From.Type = obj.TYPE_CONST
   700  		p.From.Offset = v.AuxInt
   701  		p.To.Type = obj.TYPE_REG
   702  		p.To.Reg = v.Reg()
   703  
   704  	case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   705  		p := s.Prog(v.Op.Asm())
   706  		p.From.Type = obj.TYPE_FCONST
   707  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   708  		p.To.Type = obj.TYPE_REG
   709  		p.To.Reg = v.Reg()
   710  
   711  	case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   712  		p := s.Prog(v.Op.Asm())
   713  		p.From.Type = obj.TYPE_REG
   714  		p.From.Reg = v.Args[0].Reg()
   715  		p.To.Type = obj.TYPE_REG
   716  		p.To.Reg = v.Args[1].Reg()
   717  
   718  	case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   719  		p := s.Prog(v.Op.Asm())
   720  		p.From.Type = obj.TYPE_REG
   721  		p.From.Reg = v.Args[0].Reg()
   722  		p.To.Type = obj.TYPE_CONST
   723  		p.To.Offset = v.AuxInt
   724  
   725  	case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   726  		// Shift in register to required size
   727  		p := s.Prog(v.Op.Asm())
   728  		p.From.Type = obj.TYPE_REG
   729  		p.From.Reg = v.Args[0].Reg()
   730  		p.To.Reg = v.Reg()
   731  		p.To.Type = obj.TYPE_REG
   732  
   733  	case ssa.OpPPC64MOVDload:
   734  
   735  		// MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
   736  		// For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
   737  		// the offset is not known until link time. If the load of a go.string uses relocation for the
   738  		// offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
   739  		// To avoid this problem, the full address of the go.string is computed and loaded into the base register,
   740  		// and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
   741  		// go.string types because other types will have proper alignment.
   742  
   743  		gostring := false
   744  		switch n := v.Aux.(type) {
   745  		case *obj.LSym:
   746  			gostring = strings.HasPrefix(n.Name, "go.string.")
   747  		}
   748  		if gostring {
   749  			// Generate full addr of the go.string const
   750  			// including AuxInt
   751  			p := s.Prog(ppc64.AMOVD)
   752  			p.From.Type = obj.TYPE_ADDR
   753  			p.From.Reg = v.Args[0].Reg()
   754  			gc.AddAux(&p.From, v)
   755  			p.To.Type = obj.TYPE_REG
   756  			p.To.Reg = v.Reg()
   757  			// Load go.string using 0 offset
   758  			p = s.Prog(v.Op.Asm())
   759  			p.From.Type = obj.TYPE_MEM
   760  			p.From.Reg = v.Reg()
   761  			p.To.Type = obj.TYPE_REG
   762  			p.To.Reg = v.Reg()
   763  			break
   764  		}
   765  		// Not a go.string, generate a normal load
   766  		fallthrough
   767  
   768  	case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   769  		p := s.Prog(v.Op.Asm())
   770  		p.From.Type = obj.TYPE_MEM
   771  		p.From.Reg = v.Args[0].Reg()
   772  		gc.AddAux(&p.From, v)
   773  		p.To.Type = obj.TYPE_REG
   774  		p.To.Reg = v.Reg()
   775  
   776  	case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   777  		p := s.Prog(v.Op.Asm())
   778  		p.From.Type = obj.TYPE_MEM
   779  		p.From.Reg = v.Args[0].Reg()
   780  		p.To.Type = obj.TYPE_REG
   781  		p.To.Reg = v.Reg()
   782  
   783  	case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   784  		p := s.Prog(v.Op.Asm())
   785  		p.To.Type = obj.TYPE_MEM
   786  		p.To.Reg = v.Args[0].Reg()
   787  		p.From.Type = obj.TYPE_REG
   788  		p.From.Reg = v.Args[1].Reg()
   789  
   790  	case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   791  		ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   792  		ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   793  		p := s.Prog(v.Op.Asm())
   794  		p.From.Type = obj.TYPE_MEM
   795  		p.From.Reg = v.Args[0].Reg()
   796  		p.From.Index = v.Args[1].Reg()
   797  		gc.AddAux(&p.From, v)
   798  		p.To.Type = obj.TYPE_REG
   799  		p.To.Reg = v.Reg()
   800  
   801  	case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   802  		p := s.Prog(v.Op.Asm())
   803  		p.From.Type = obj.TYPE_REG
   804  		p.From.Reg = ppc64.REGZERO
   805  		p.To.Type = obj.TYPE_MEM
   806  		p.To.Reg = v.Args[0].Reg()
   807  		gc.AddAux(&p.To, v)
   808  
   809  	case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   810  		p := s.Prog(v.Op.Asm())
   811  		p.From.Type = obj.TYPE_REG
   812  		p.From.Reg = v.Args[1].Reg()
   813  		p.To.Type = obj.TYPE_MEM
   814  		p.To.Reg = v.Args[0].Reg()
   815  		gc.AddAux(&p.To, v)
   816  
   817  	case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   818  		ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   819  		ssa.OpPPC64MOVHBRstoreidx:
   820  		p := s.Prog(v.Op.Asm())
   821  		p.From.Type = obj.TYPE_REG
   822  		p.From.Reg = v.Args[2].Reg()
   823  		p.To.Index = v.Args[1].Reg()
   824  		p.To.Type = obj.TYPE_MEM
   825  		p.To.Reg = v.Args[0].Reg()
   826  		gc.AddAux(&p.To, v)
   827  
   828  	case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB:
   829  		// ISEL, ISELB
   830  		// AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE
   831  		// ISEL only accepts 0, 1, 2 condition values but the others can be
   832  		// achieved by swapping operand order.
   833  		// arg0 ? arg1 : arg2 with conditions LT, GT, EQ
   834  		// arg0 ? arg2 : arg1 for conditions GE, LE, NE
   835  		// ISELB is used when a boolean result is needed, returning 0 or 1
   836  		p := s.Prog(ppc64.AISEL)
   837  		p.To.Type = obj.TYPE_REG
   838  		p.To.Reg = v.Reg()
   839  		// For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load.
   840  		r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0}
   841  		if v.Op == ssa.OpPPC64ISEL {
   842  			r.Reg = v.Args[1].Reg()
   843  		}
   844  		// AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2
   845  		if v.AuxInt > 3 {
   846  			p.Reg = r.Reg
   847  			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   848  		} else {
   849  			p.Reg = v.Args[0].Reg()
   850  			p.SetFrom3(r)
   851  		}
   852  		p.From.Type = obj.TYPE_CONST
   853  		p.From.Offset = v.AuxInt & 3
   854  
   855  	case ssa.OpPPC64LoweredZero:
   856  
   857  		// unaligned data doesn't hurt performance
   858  		// for these instructions on power8 or later
   859  
   860  		// for sizes >= 64 generate a loop as follows:
   861  
   862  		// set up loop counter in CTR, used by BC
   863  		//       XXLXOR VS32,VS32,VS32
   864  		//	 MOVD len/32,REG_TMP
   865  		//	 MOVD REG_TMP,CTR
   866  		//       MOVD $16,REG_TMP
   867  		//	 loop:
   868  		//	 STXVD2X VS32,(R0)(R3)
   869  		//	 STXVD2X VS32,(R31)(R3)
   870  		//	 ADD  $32,R3
   871  		//	 BC   16, 0, loop
   872  		//
   873  		// any remainder is done as described below
   874  
   875  		// for sizes < 64 bytes, first clear as many doublewords as possible,
   876  		// then handle the remainder
   877  		//	MOVD R0,(R3)
   878  		//	MOVD R0,8(R3)
   879  		// .... etc.
   880  		//
   881  		// the remainder bytes are cleared using one or more
   882  		// of the following instructions with the appropriate
   883  		// offsets depending which instructions are needed
   884  		//
   885  		//	MOVW R0,n1(R3)	4 bytes
   886  		//	MOVH R0,n2(R3)	2 bytes
   887  		//	MOVB R0,n3(R3)	1 byte
   888  		//
   889  		// 7 bytes: MOVW, MOVH, MOVB
   890  		// 6 bytes: MOVW, MOVH
   891  		// 5 bytes: MOVW, MOVB
   892  		// 3 bytes: MOVH, MOVB
   893  
   894  		// each loop iteration does 32 bytes
   895  		ctr := v.AuxInt / 32
   896  
   897  		// remainder bytes
   898  		rem := v.AuxInt % 32
   899  
   900  		// only generate a loop if there is more
   901  		// than 1 iteration.
   902  		if ctr > 1 {
   903  			// Set up VS32 (V0) to hold 0s
   904  			p := s.Prog(ppc64.AXXLXOR)
   905  			p.From.Type = obj.TYPE_REG
   906  			p.From.Reg = ppc64.REG_VS32
   907  			p.To.Type = obj.TYPE_REG
   908  			p.To.Reg = ppc64.REG_VS32
   909  			p.Reg = ppc64.REG_VS32
   910  
   911  			// Set up CTR loop counter
   912  			p = s.Prog(ppc64.AMOVD)
   913  			p.From.Type = obj.TYPE_CONST
   914  			p.From.Offset = ctr
   915  			p.To.Type = obj.TYPE_REG
   916  			p.To.Reg = ppc64.REGTMP
   917  
   918  			p = s.Prog(ppc64.AMOVD)
   919  			p.From.Type = obj.TYPE_REG
   920  			p.From.Reg = ppc64.REGTMP
   921  			p.To.Type = obj.TYPE_REG
   922  			p.To.Reg = ppc64.REG_CTR
   923  
   924  			// Set up R31 to hold index value 16
   925  			p = s.Prog(ppc64.AMOVD)
   926  			p.From.Type = obj.TYPE_CONST
   927  			p.From.Offset = 16
   928  			p.To.Type = obj.TYPE_REG
   929  			p.To.Reg = ppc64.REGTMP
   930  
   931  			// generate 2 STXVD2Xs to store 16 bytes
   932  			// when this is a loop then the top must be saved
   933  			var top *obj.Prog
   934  			// This is the top of loop
   935  			p = s.Prog(ppc64.ASTXVD2X)
   936  			p.From.Type = obj.TYPE_REG
   937  			p.From.Reg = ppc64.REG_VS32
   938  			p.To.Type = obj.TYPE_MEM
   939  			p.To.Reg = v.Args[0].Reg()
   940  			p.To.Index = ppc64.REGZERO
   941  			// Save the top of loop
   942  			if top == nil {
   943  				top = p
   944  			}
   945  
   946  			p = s.Prog(ppc64.ASTXVD2X)
   947  			p.From.Type = obj.TYPE_REG
   948  			p.From.Reg = ppc64.REG_VS32
   949  			p.To.Type = obj.TYPE_MEM
   950  			p.To.Reg = v.Args[0].Reg()
   951  			p.To.Index = ppc64.REGTMP
   952  
   953  			// Increment address for the
   954  			// 4 doublewords just zeroed.
   955  			p = s.Prog(ppc64.AADD)
   956  			p.Reg = v.Args[0].Reg()
   957  			p.From.Type = obj.TYPE_CONST
   958  			p.From.Offset = 32
   959  			p.To.Type = obj.TYPE_REG
   960  			p.To.Reg = v.Args[0].Reg()
   961  
   962  			// Branch back to top of loop
   963  			// based on CTR
   964  			// BC with BO_BCTR generates bdnz
   965  			p = s.Prog(ppc64.ABC)
   966  			p.From.Type = obj.TYPE_CONST
   967  			p.From.Offset = ppc64.BO_BCTR
   968  			p.Reg = ppc64.REG_R0
   969  			p.To.Type = obj.TYPE_BRANCH
   970  			gc.Patch(p, top)
   971  		}
   972  
   973  		// when ctr == 1 the loop was not generated but
   974  		// there are at least 32 bytes to clear, so add
   975  		// that to the remainder to generate the code
   976  		// to clear those doublewords
   977  		if ctr == 1 {
   978  			rem += 32
   979  		}
   980  
   981  		// clear the remainder starting at offset zero
   982  		offset := int64(0)
   983  
   984  		// first clear as many doublewords as possible
   985  		// then clear remaining sizes as available
   986  		for rem > 0 {
   987  			op, size := ppc64.AMOVB, int64(1)
   988  			switch {
   989  			case rem >= 8:
   990  				op, size = ppc64.AMOVD, 8
   991  			case rem >= 4:
   992  				op, size = ppc64.AMOVW, 4
   993  			case rem >= 2:
   994  				op, size = ppc64.AMOVH, 2
   995  			}
   996  			p := s.Prog(op)
   997  			p.From.Type = obj.TYPE_REG
   998  			p.From.Reg = ppc64.REG_R0
   999  			p.To.Type = obj.TYPE_MEM
  1000  			p.To.Reg = v.Args[0].Reg()
  1001  			p.To.Offset = offset
  1002  			rem -= size
  1003  			offset += size
  1004  		}
  1005  
  1006  	case ssa.OpPPC64LoweredMove:
  1007  
  1008  		// This will be used when moving more
  1009  		// than 8 bytes.  Moves start with
  1010  		// as many 8 byte moves as possible, then
  1011  		// 4, 2, or 1 byte(s) as remaining.  This will
  1012  		// work and be efficient for power8 or later.
  1013  		// If there are 64 or more bytes, then a
  1014  		// loop is generated to move 32 bytes and
  1015  		// update the src and dst addresses on each
  1016  		// iteration. When < 64 bytes, the appropriate
  1017  		// number of moves are generated based on the
  1018  		// size.
  1019  		// When moving >= 64 bytes a loop is used
  1020  		//	MOVD len/32,REG_TMP
  1021  		//	MOVD REG_TMP,CTR
  1022  		//	MOVD $16,REG_TMP
  1023  		// top:
  1024  		//	LXVD2X (R0)(R4),VS32
  1025  		//	LXVD2X (R31)(R4),VS33
  1026  		//	ADD $32,R4
  1027  		//	STXVD2X VS32,(R0)(R3)
  1028  		//	STXVD2X VS33,(R31)(R4)
  1029  		//	ADD $32,R3
  1030  		//	BC 16,0,top
  1031  		// Bytes not moved by this loop are moved
  1032  		// with a combination of the following instructions,
  1033  		// starting with the largest sizes and generating as
  1034  		// many as needed, using the appropriate offset value.
  1035  		//	MOVD  n(R4),R14
  1036  		//	MOVD  R14,n(R3)
  1037  		//	MOVW  n1(R4),R14
  1038  		//	MOVW  R14,n1(R3)
  1039  		//	MOVH  n2(R4),R14
  1040  		//	MOVH  R14,n2(R3)
  1041  		//	MOVB  n3(R4),R14
  1042  		//	MOVB  R14,n3(R3)
  1043  
  1044  		// Each loop iteration moves 32 bytes
  1045  		ctr := v.AuxInt / 32
  1046  
  1047  		// Remainder after the loop
  1048  		rem := v.AuxInt % 32
  1049  
  1050  		dst_reg := v.Args[0].Reg()
  1051  		src_reg := v.Args[1].Reg()
  1052  
  1053  		// The set of registers used here, must match the clobbered reg list
  1054  		// in PPC64Ops.go.
  1055  		offset := int64(0)
  1056  
  1057  		// top of the loop
  1058  		var top *obj.Prog
  1059  		// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1060  		if ctr > 1 {
  1061  			// Set up the CTR
  1062  			p := s.Prog(ppc64.AMOVD)
  1063  			p.From.Type = obj.TYPE_CONST
  1064  			p.From.Offset = ctr
  1065  			p.To.Type = obj.TYPE_REG
  1066  			p.To.Reg = ppc64.REGTMP
  1067  
  1068  			p = s.Prog(ppc64.AMOVD)
  1069  			p.From.Type = obj.TYPE_REG
  1070  			p.From.Reg = ppc64.REGTMP
  1071  			p.To.Type = obj.TYPE_REG
  1072  			p.To.Reg = ppc64.REG_CTR
  1073  
  1074  			// Use REGTMP as index reg
  1075  			p = s.Prog(ppc64.AMOVD)
  1076  			p.From.Type = obj.TYPE_CONST
  1077  			p.From.Offset = 16
  1078  			p.To.Type = obj.TYPE_REG
  1079  			p.To.Reg = ppc64.REGTMP
  1080  
  1081  			// Generate 16 byte loads and stores.
  1082  			// Use temp register for index (16)
  1083  			// on the second one.
  1084  			p = s.Prog(ppc64.ALXVD2X)
  1085  			p.From.Type = obj.TYPE_MEM
  1086  			p.From.Reg = src_reg
  1087  			p.From.Index = ppc64.REGZERO
  1088  			p.To.Type = obj.TYPE_REG
  1089  			p.To.Reg = ppc64.REG_VS32
  1090  
  1091  			if top == nil {
  1092  				top = p
  1093  			}
  1094  
  1095  			p = s.Prog(ppc64.ALXVD2X)
  1096  			p.From.Type = obj.TYPE_MEM
  1097  			p.From.Reg = src_reg
  1098  			p.From.Index = ppc64.REGTMP
  1099  			p.To.Type = obj.TYPE_REG
  1100  			p.To.Reg = ppc64.REG_VS33
  1101  
  1102  			// increment the src reg for next iteration
  1103  			p = s.Prog(ppc64.AADD)
  1104  			p.Reg = src_reg
  1105  			p.From.Type = obj.TYPE_CONST
  1106  			p.From.Offset = 32
  1107  			p.To.Type = obj.TYPE_REG
  1108  			p.To.Reg = src_reg
  1109  
  1110  			// generate 16 byte stores
  1111  			p = s.Prog(ppc64.ASTXVD2X)
  1112  			p.From.Type = obj.TYPE_REG
  1113  			p.From.Reg = ppc64.REG_VS32
  1114  			p.To.Type = obj.TYPE_MEM
  1115  			p.To.Reg = dst_reg
  1116  			p.To.Index = ppc64.REGZERO
  1117  
  1118  			p = s.Prog(ppc64.ASTXVD2X)
  1119  			p.From.Type = obj.TYPE_REG
  1120  			p.From.Reg = ppc64.REG_VS33
  1121  			p.To.Type = obj.TYPE_MEM
  1122  			p.To.Reg = dst_reg
  1123  			p.To.Index = ppc64.REGTMP
  1124  
  1125  			// increment the dst reg for next iteration
  1126  			p = s.Prog(ppc64.AADD)
  1127  			p.Reg = dst_reg
  1128  			p.From.Type = obj.TYPE_CONST
  1129  			p.From.Offset = 32
  1130  			p.To.Type = obj.TYPE_REG
  1131  			p.To.Reg = dst_reg
  1132  
  1133  			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1134  			// to loop top.
  1135  			p = s.Prog(ppc64.ABC)
  1136  			p.From.Type = obj.TYPE_CONST
  1137  			p.From.Offset = ppc64.BO_BCTR
  1138  			p.Reg = ppc64.REG_R0
  1139  			p.To.Type = obj.TYPE_BRANCH
  1140  			gc.Patch(p, top)
  1141  
  1142  			// src_reg and dst_reg were incremented in the loop, so
  1143  			// later instructions start with offset 0.
  1144  			offset = int64(0)
  1145  		}
  1146  
  1147  		// No loop was generated for one iteration, so
  1148  		// add 32 bytes to the remainder to move those bytes.
  1149  		if ctr == 1 {
  1150  			rem += 32
  1151  		}
  1152  
  1153  		if rem >= 16 {
  1154  			// Generate 16 byte loads and stores.
  1155  			// Use temp register for index (value 16)
  1156  			// on the second one.
  1157  			p := s.Prog(ppc64.ALXVD2X)
  1158  			p.From.Type = obj.TYPE_MEM
  1159  			p.From.Reg = src_reg
  1160  			p.From.Index = ppc64.REGZERO
  1161  			p.To.Type = obj.TYPE_REG
  1162  			p.To.Reg = ppc64.REG_VS32
  1163  
  1164  			p = s.Prog(ppc64.ASTXVD2X)
  1165  			p.From.Type = obj.TYPE_REG
  1166  			p.From.Reg = ppc64.REG_VS32
  1167  			p.To.Type = obj.TYPE_MEM
  1168  			p.To.Reg = dst_reg
  1169  			p.To.Index = ppc64.REGZERO
  1170  
  1171  			offset = 16
  1172  			rem -= 16
  1173  
  1174  			if rem >= 16 {
  1175  				// Use REGTMP as index reg
  1176  				p = s.Prog(ppc64.AMOVD)
  1177  				p.From.Type = obj.TYPE_CONST
  1178  				p.From.Offset = 16
  1179  				p.To.Type = obj.TYPE_REG
  1180  				p.To.Reg = ppc64.REGTMP
  1181  
  1182  				// Generate 16 byte loads and stores.
  1183  				// Use temp register for index (16)
  1184  				// on the second one.
  1185  				p = s.Prog(ppc64.ALXVD2X)
  1186  				p.From.Type = obj.TYPE_MEM
  1187  				p.From.Reg = src_reg
  1188  				p.From.Index = ppc64.REGTMP
  1189  				p.To.Type = obj.TYPE_REG
  1190  				p.To.Reg = ppc64.REG_VS32
  1191  
  1192  				p = s.Prog(ppc64.ASTXVD2X)
  1193  				p.From.Type = obj.TYPE_REG
  1194  				p.From.Reg = ppc64.REG_VS32
  1195  				p.To.Type = obj.TYPE_MEM
  1196  				p.To.Reg = dst_reg
  1197  				p.To.Index = ppc64.REGTMP
  1198  
  1199  				offset = 32
  1200  				rem -= 16
  1201  			}
  1202  		}
  1203  
  1204  		// Generate all the remaining load and store pairs, starting with
  1205  		// as many 8 byte moves as possible, then 4, 2, 1.
  1206  		for rem > 0 {
  1207  			op, size := ppc64.AMOVB, int64(1)
  1208  			switch {
  1209  			case rem >= 8:
  1210  				op, size = ppc64.AMOVD, 8
  1211  			case rem >= 4:
  1212  				op, size = ppc64.AMOVW, 4
  1213  			case rem >= 2:
  1214  				op, size = ppc64.AMOVH, 2
  1215  			}
  1216  			// Load
  1217  			p := s.Prog(op)
  1218  			p.To.Type = obj.TYPE_REG
  1219  			p.To.Reg = ppc64.REG_R14
  1220  			p.From.Type = obj.TYPE_MEM
  1221  			p.From.Reg = src_reg
  1222  			p.From.Offset = offset
  1223  
  1224  			// Store
  1225  			p = s.Prog(op)
  1226  			p.From.Type = obj.TYPE_REG
  1227  			p.From.Reg = ppc64.REG_R14
  1228  			p.To.Type = obj.TYPE_MEM
  1229  			p.To.Reg = dst_reg
  1230  			p.To.Offset = offset
  1231  			rem -= size
  1232  			offset += size
  1233  		}
  1234  
  1235  	case ssa.OpPPC64CALLstatic:
  1236  		s.Call(v)
  1237  
  1238  	case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1239  		p := s.Prog(ppc64.AMOVD)
  1240  		p.From.Type = obj.TYPE_REG
  1241  		p.From.Reg = v.Args[0].Reg()
  1242  		p.To.Type = obj.TYPE_REG
  1243  		p.To.Reg = ppc64.REG_LR
  1244  
  1245  		if v.Args[0].Reg() != ppc64.REG_R12 {
  1246  			v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1247  		}
  1248  
  1249  		pp := s.Call(v)
  1250  		pp.To.Reg = ppc64.REG_LR
  1251  
  1252  		if gc.Ctxt.Flag_shared {
  1253  			// When compiling Go into PIC, the function we just
  1254  			// called via pointer might have been implemented in
  1255  			// a separate module and so overwritten the TOC
  1256  			// pointer in R2; reload it.
  1257  			q := s.Prog(ppc64.AMOVD)
  1258  			q.From.Type = obj.TYPE_MEM
  1259  			q.From.Offset = 24
  1260  			q.From.Reg = ppc64.REGSP
  1261  			q.To.Type = obj.TYPE_REG
  1262  			q.To.Reg = ppc64.REG_R2
  1263  		}
  1264  
  1265  	case ssa.OpPPC64LoweredWB:
  1266  		p := s.Prog(obj.ACALL)
  1267  		p.To.Type = obj.TYPE_MEM
  1268  		p.To.Name = obj.NAME_EXTERN
  1269  		p.To.Sym = v.Aux.(*obj.LSym)
  1270  
  1271  	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1272  		p := s.Prog(obj.ACALL)
  1273  		p.To.Type = obj.TYPE_MEM
  1274  		p.To.Name = obj.NAME_EXTERN
  1275  		p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
  1276  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1277  
  1278  	case ssa.OpPPC64LoweredNilCheck:
  1279  		if objabi.GOOS == "aix" {
  1280  			// CMP Rarg0, R0
  1281  			// BNE 2(PC)
  1282  			// STW R0, 0(R0)
  1283  			// NOP (so the BNE has somewhere to land)
  1284  
  1285  			// CMP Rarg0, R0
  1286  			p := s.Prog(ppc64.ACMP)
  1287  			p.From.Type = obj.TYPE_REG
  1288  			p.From.Reg = v.Args[0].Reg()
  1289  			p.To.Type = obj.TYPE_REG
  1290  			p.To.Reg = ppc64.REG_R0
  1291  
  1292  			// BNE 2(PC)
  1293  			p2 := s.Prog(ppc64.ABNE)
  1294  			p2.To.Type = obj.TYPE_BRANCH
  1295  
  1296  			// STW R0, 0(R0)
  1297  			// Write at 0 is forbidden and will trigger a SIGSEGV
  1298  			p = s.Prog(ppc64.AMOVW)
  1299  			p.From.Type = obj.TYPE_REG
  1300  			p.From.Reg = ppc64.REG_R0
  1301  			p.To.Type = obj.TYPE_MEM
  1302  			p.To.Reg = ppc64.REG_R0
  1303  
  1304  			// NOP (so the BNE has somewhere to land)
  1305  			nop := s.Prog(obj.ANOP)
  1306  			gc.Patch(p2, nop)
  1307  
  1308  		} else {
  1309  			// Issue a load which will fault if arg is nil.
  1310  			p := s.Prog(ppc64.AMOVBZ)
  1311  			p.From.Type = obj.TYPE_MEM
  1312  			p.From.Reg = v.Args[0].Reg()
  1313  			gc.AddAux(&p.From, v)
  1314  			p.To.Type = obj.TYPE_REG
  1315  			p.To.Reg = ppc64.REGTMP
  1316  		}
  1317  		if logopt.Enabled() {
  1318  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1319  		}
  1320  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1321  			gc.Warnl(v.Pos, "generated nil check")
  1322  		}
  1323  
  1324  	// These should be resolved by rules and not make it here.
  1325  	case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan,
  1326  		ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual,
  1327  		ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual:
  1328  		v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString())
  1329  	case ssa.OpPPC64InvertFlags:
  1330  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1331  	case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1332  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1333  	case ssa.OpClobber:
  1334  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1335  	default:
  1336  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1337  	}
  1338  }
  1339  
  1340  var blockJump = [...]struct {
  1341  	asm, invasm     obj.As
  1342  	asmeq, invasmun bool
  1343  }{
  1344  	ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1345  	ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1346  
  1347  	ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1348  	ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1349  	ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1350  	ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1351  
  1352  	// TODO: need to work FP comparisons into block jumps
  1353  	ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1354  	ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1355  	ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1356  	ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1357  }
  1358  
  1359  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1360  	switch b.Kind {
  1361  	case ssa.BlockDefer:
  1362  		// defer returns in R3:
  1363  		// 0 if we should continue executing
  1364  		// 1 if we should jump to deferreturn call
  1365  		p := s.Prog(ppc64.ACMP)
  1366  		p.From.Type = obj.TYPE_REG
  1367  		p.From.Reg = ppc64.REG_R3
  1368  		p.To.Type = obj.TYPE_REG
  1369  		p.To.Reg = ppc64.REG_R0
  1370  
  1371  		p = s.Prog(ppc64.ABNE)
  1372  		p.To.Type = obj.TYPE_BRANCH
  1373  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1374  		if b.Succs[0].Block() != next {
  1375  			p := s.Prog(obj.AJMP)
  1376  			p.To.Type = obj.TYPE_BRANCH
  1377  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1378  		}
  1379  
  1380  	case ssa.BlockPlain:
  1381  		if b.Succs[0].Block() != next {
  1382  			p := s.Prog(obj.AJMP)
  1383  			p.To.Type = obj.TYPE_BRANCH
  1384  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1385  		}
  1386  	case ssa.BlockExit:
  1387  	case ssa.BlockRet:
  1388  		s.Prog(obj.ARET)
  1389  	case ssa.BlockRetJmp:
  1390  		p := s.Prog(obj.AJMP)
  1391  		p.To.Type = obj.TYPE_MEM
  1392  		p.To.Name = obj.NAME_EXTERN
  1393  		p.To.Sym = b.Aux.(*obj.LSym)
  1394  
  1395  	case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  1396  		ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  1397  		ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  1398  		ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  1399  		ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  1400  		jmp := blockJump[b.Kind]
  1401  		switch next {
  1402  		case b.Succs[0].Block():
  1403  			s.Br(jmp.invasm, b.Succs[1].Block())
  1404  			if jmp.invasmun {
  1405  				// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1406  				s.Br(ppc64.ABVS, b.Succs[1].Block())
  1407  			}
  1408  		case b.Succs[1].Block():
  1409  			s.Br(jmp.asm, b.Succs[0].Block())
  1410  			if jmp.asmeq {
  1411  				s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1412  			}
  1413  		default:
  1414  			if b.Likely != ssa.BranchUnlikely {
  1415  				s.Br(jmp.asm, b.Succs[0].Block())
  1416  				if jmp.asmeq {
  1417  					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1418  				}
  1419  				s.Br(obj.AJMP, b.Succs[1].Block())
  1420  			} else {
  1421  				s.Br(jmp.invasm, b.Succs[1].Block())
  1422  				if jmp.invasmun {
  1423  					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1424  					s.Br(ppc64.ABVS, b.Succs[1].Block())
  1425  				}
  1426  				s.Br(obj.AJMP, b.Succs[0].Block())
  1427  			}
  1428  		}
  1429  	default:
  1430  		b.Fatalf("branch not implemented: %s", b.LongString())
  1431  	}
  1432  }