github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  import "strings"
    10  
    11  // Notes:
    12  //  - Less-than-64-bit integer types live in the low portion of registers.
    13  //    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
    14  //  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
    15  //  - *const instructions may use a constant larger than the instruction can encode.
    16  //    In this case the assembler expands to multiple instructions and uses tmp
    17  //    register (R31).
    18  
    19  var regNamesPPC64 = []string{
    20  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    21  	"SP", // REGSP
    22  	"SB", // REGSB
    23  	"R3",
    24  	"R4",
    25  	"R5",
    26  	"R6",
    27  	"R7",
    28  	"R8",
    29  	"R9",
    30  	"R10",
    31  	"R11", // REGCTXT for closures
    32  	"R12",
    33  	"R13", // REGTLS
    34  	"R14",
    35  	"R15",
    36  	"R16",
    37  	"R17",
    38  	"R18",
    39  	"R19",
    40  	"R20",
    41  	"R21",
    42  	"R22",
    43  	"R23",
    44  	"R24",
    45  	"R25",
    46  	"R26",
    47  	"R27",
    48  	"R28",
    49  	"R29",
    50  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    51  	"R31", // REGTMP
    52  
    53  	"F0",
    54  	"F1",
    55  	"F2",
    56  	"F3",
    57  	"F4",
    58  	"F5",
    59  	"F6",
    60  	"F7",
    61  	"F8",
    62  	"F9",
    63  	"F10",
    64  	"F11",
    65  	"F12",
    66  	"F13",
    67  	"F14",
    68  	"F15",
    69  	"F16",
    70  	"F17",
    71  	"F18",
    72  	"F19",
    73  	"F20",
    74  	"F21",
    75  	"F22",
    76  	"F23",
    77  	"F24",
    78  	"F25",
    79  	"F26",
    80  	"F27",
    81  	"F28",
    82  	"F29",
    83  	"F30",
    84  	"F31",
    85  
    86  	// "CR0",
    87  	// "CR1",
    88  	// "CR2",
    89  	// "CR3",
    90  	// "CR4",
    91  	// "CR5",
    92  	// "CR6",
    93  	// "CR7",
    94  
    95  	// "CR",
    96  	// "XER",
    97  	// "LR",
    98  	// "CTR",
    99  }
   100  
   101  func init() {
   102  	// Make map from reg names to reg integers.
   103  	if len(regNamesPPC64) > 64 {
   104  		panic("too many registers")
   105  	}
   106  	num := map[string]int{}
   107  	for i, name := range regNamesPPC64 {
   108  		num[name] = i
   109  	}
   110  	buildReg := func(s string) regMask {
   111  		m := regMask(0)
   112  		for _, r := range strings.Split(s, " ") {
   113  			if n, ok := num[r]; ok {
   114  				m |= regMask(1) << uint(n)
   115  				continue
   116  			}
   117  			panic("register " + r + " not found")
   118  		}
   119  		return m
   120  	}
   121  
   122  	var (
   123  		gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   124  		fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
   125  		sp = buildReg("SP")
   126  		sb = buildReg("SB")
   127  		gr = buildReg("g")
   128  		// cr  = buildReg("CR")
   129  		// ctr = buildReg("CTR")
   130  		// lr  = buildReg("LR")
   131  		tmp     = buildReg("R31")
   132  		ctxt    = buildReg("R11")
   133  		callptr = buildReg("R12")
   134  		// tls = buildReg("R13")
   135  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   136  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   137  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   138  		gp22        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
   139  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   140  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   141  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   142  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   143  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   144  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   145  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   146  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   147  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   148  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   149  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   150  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   151  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   152  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   153  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   154  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   155  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   156  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   157  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   158  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   159  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   160  		callerSave  = regMask(gp | fp | gr)
   161  	)
   162  	ops := []opData{
   163  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
   164  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},     // arg0 + auxInt
   165  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
   166  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
   167  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
   168  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
   169  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
   170  
   171  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   172  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   173  
   174  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   175  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   176  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   177  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   178  		{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true},   // arg0 * arg1, returns (hi, lo)
   179  
   180  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   181  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   182  
   183  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   184  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   185  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   186  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   187  
   188  		{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // arg0 >>a arg1, 64 bits (all sign if arg1 & 64 != 0)
   189  		{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >>a arg1, 32 bits (all sign if arg1 & 32 != 0)
   190  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // arg0 >> arg1, 64 bits  (0 if arg1 & 64 != 0)
   191  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // arg0 >> arg1, 32 bits  (0 if arg1 & 32 != 0)
   192  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << arg1, 64 bits  (0 if arg1 & 64 != 0)
   193  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << arg1, 32 bits  (0 if arg1 & 32 != 0)
   194  
   195  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   196  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   197  
   198  		{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
   199  		{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
   200  
   201  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // arg0 >>a aux, 64 bits
   202  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // arg0 >>a aux, 32 bits
   203  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // arg0 >> aux, 64 bits
   204  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // arg0 >> aux, 32 bits
   205  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
   206  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
   207  
   208  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   209  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   210  
   211  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
   212  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
   213  
   214  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   215  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   216  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
   217  
   218  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   219  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   220  
   221  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   222  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   223  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   224  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   225  
   226  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   227  
   228  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   229  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   230  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   231  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   232  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   233  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   234  
   235  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   236  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   237  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   238  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   239  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   240  
   241  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   242  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   243  
   244  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},               // arg0&arg1
   245  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
   246  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
   247  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
   248  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},               // ^(arg0|arg1)
   249  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
   250  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
   251  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
   252  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                // -arg0 (floating point)
   253  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                              // sqrt(arg0) (floating point)
   254  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                            // sqrt(arg0) (floating point, single precision)
   255  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                              // floor(arg0), float64
   256  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                               // ceil(arg0), float64
   257  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                              // trunc(arg0), float64
   258  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                              // round(arg0), float64
   259  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                // abs(arg0), float64
   260  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                              // -abs(arg0), float64
   261  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                            // copysign arg0 -> arg1, float64
   262  
   263  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
   264  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
   265  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
   266  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}}, asm: "ANDCC", aux: "Int64", typ: "Flags"},                             // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   267  
   268  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   269  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   270  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   271  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   272  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   273  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   274  
   275  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   276  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   277  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   278  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   279  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   280  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   281  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   282  
   283  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   284  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   285  		// In these cases the index register field is set to 0 and the full address is in the base register.
   286  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes reverse order
   287  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend reverse order
   288  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend reverse order
   289  
   290  		// In these cases an index register is used in addition to a base register
   291  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // zero extend uint8 to uint64
   292  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int16 to int64
   293  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint16 to uint64
   294  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int32 to int64
   295  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint32 to uint64
   296  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},
   297  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // sign extend int16 to int64
   298  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // sign extend int32 to int64
   299  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},
   300  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"},
   301  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"},
   302  
   303  		// Store bytes in the reverse endian order of the arch into arg0.
   304  		// These are indexes stores with no offset field in the instruction so the aux fields are not used.
   305  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
   306  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes reverse order
   307  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes reverse order
   308  
   309  		// Floating point loads from arg0+aux+auxint
   310  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   311  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   312  
   313  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   314  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   315  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   316  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   317  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   318  
   319  		// Store floating point value into arg0+aux+auxint
   320  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   321  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   322  
   323  		// Stores using index and base registers
   324  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store bye
   325  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store half word
   326  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store word
   327  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store double word
   328  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store double float
   329  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store single float
   330  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store half word reversed byte using index reg
   331  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store word reversed byte using index reg
   332  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double word reversed byte using index reg
   333  
   334  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   335  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   336  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   337  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   338  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   339  
   340  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   341  
   342  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   343  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   344  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   345  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   346  
   347  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   348  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   349  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   350  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   351  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   352  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   353  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   354  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   355  
   356  		// pseudo-ops
   357  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   358  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   359  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   360  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   361  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   362  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   363  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   364  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   365  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   366  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   367  
   368  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   369  		// and sorts it to the very beginning of the block to prevent other
   370  		// use of the closure pointer.
   371  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   372  
   373  		// LoweredGetCallerSP returns the SP of the caller of the current function.
   374  		{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
   375  
   376  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   377  		// I.e., if f calls g "calls" getcallerpc,
   378  		// the result should be the PC within f that g will return to.
   379  		// See runtime/stubs.go for a more detailed discussion.
   380  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   381  
   382  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   383  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   384  		// Round ops to block fused-multiply-add extraction.
   385  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   386  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   387  
   388  		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true, call: true, symEffect: "None"},                   // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   389  		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   390  		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   391  
   392  		// large or unaligned zeroing
   393  		// arg0 = address of memory to zero (in R3, changed as side effect)
   394  		// returns mem
   395  		//
   396  		// a loop is generated when there is more than one iteration
   397  		// needed to clear 4 doublewords
   398  		//
   399  		// 	MOVD	$len/32,R31
   400  		//	MOVD	R31,CTR
   401  		//	loop:
   402  		//	MOVD	R0,(R3)
   403  		//	MOVD	R0,8(R3)
   404  		//	MOVD	R0,16(R3)
   405  		//	MOVD	R0,24(R3)
   406  		//	ADD	R3,32
   407  		//	BC	loop
   408  
   409  		// remaining doubleword clears generated as needed
   410  		//	MOVD	R0,(R3)
   411  		//	MOVD	R0,8(R3)
   412  		//	MOVD	R0,16(R3)
   413  		//	MOVD	R0,24(R3)
   414  
   415  		// one or more of these to clear remainder < 8 bytes
   416  		//	MOVW	R0,n1(R3)
   417  		//	MOVH	R0,n2(R3)
   418  		//	MOVB	R0,n3(R3)
   419  		{
   420  			name:      "LoweredZero",
   421  			aux:       "Int64",
   422  			argLength: 2,
   423  			reg: regInfo{
   424  				inputs:   []regMask{buildReg("R3")},
   425  				clobbers: buildReg("R3"),
   426  			},
   427  			clobberFlags:   true,
   428  			typ:            "Mem",
   429  			faultOnNilArg0: true,
   430  		},
   431  		// Loop code:
   432  		//	MOVD len/32,REG_TMP  only for loop
   433  		//	MOVD REG_TMP,CTR     only for loop
   434  		// loop:
   435  		//	MOVD (R4),R7
   436  		//	MOVD 8(R4),R8
   437  		//	MOVD 16(R4),R9
   438  		//	MOVD 24(R4),R10
   439  		//	ADD  R4,$32          only with loop
   440  		//	MOVD R7,(R3)
   441  		//	MOVD R8,8(R3)
   442  		//	MOVD R9,16(R3)
   443  		//	MOVD R10,24(R3)
   444  		//	ADD  R3,$32          only with loop
   445  		//	BC 16,0,loop         only with loop
   446  		// Bytes not moved by this loop are moved
   447  		// with a combination of the following instructions,
   448  		// starting with the largest sizes and generating as
   449  		// many as needed, using the appropriate offset value.
   450  		//	MOVD  n(R4),R7
   451  		//	MOVD  R7,n(R3)
   452  		//	MOVW  n1(R4),R7
   453  		//	MOVW  R7,n1(R3)
   454  		//	MOVH  n2(R4),R7
   455  		//	MOVH  R7,n2(R3)
   456  		//	MOVB  n3(R4),R7
   457  		//	MOVB  R7,n3(R3)
   458  
   459  		{
   460  			name:      "LoweredMove",
   461  			aux:       "Int64",
   462  			argLength: 3,
   463  			reg: regInfo{
   464  				inputs:   []regMask{buildReg("R3"), buildReg("R4")},
   465  				clobbers: buildReg("R3 R4 R7 R8 R9 R10"),
   466  			},
   467  			clobberFlags:   true,
   468  			typ:            "Mem",
   469  			faultOnNilArg0: true,
   470  			faultOnNilArg1: true,
   471  		},
   472  
   473  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   474  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   475  
   476  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   477  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   478  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   479  
   480  		// atomic add32, 64
   481  		// SYNC
   482  		// LDAR         (Rarg0), Rout
   483  		// ADD		Rarg1, Rout
   484  		// STDCCC       Rout, (Rarg0)
   485  		// BNE          -3(PC)
   486  		// ISYNC
   487  		// return new sum
   488  
   489  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   490  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   491  
   492  		// atomic exchange32, 64
   493  		// SYNC
   494  		// LDAR         (Rarg0), Rout
   495  		// STDCCC       Rarg1, (Rarg0)
   496  		// BNE          -2(PC)
   497  		// ISYNC
   498  		// return old val
   499  
   500  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   501  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   502  
   503  		// atomic compare and swap.
   504  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   505  		// if *arg0 == arg1 {
   506  		//   *arg0 = arg2
   507  		//   return (true, memory)
   508  		// } else {
   509  		//   return (false, memory)
   510  		// }
   511  		// SYNC
   512  		// LDAR		(Rarg0), Rtmp
   513  		// CMP		Rarg1, Rtmp
   514  		// BNE		3(PC)
   515  		// STDCCC	Rarg2, (Rarg0)
   516  		// BNE		-4(PC)
   517  		// CBNZ         Rtmp, -4(PC)
   518  		// CSET         EQ, Rout
   519  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   520  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   521  
   522  		// atomic 8 and/or.
   523  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   524  		// LBAR		(Rarg0), Rtmp
   525  		// AND/OR	Rarg1, Rtmp
   526  		// STBCCC	Rtmp, (Rarg0), Rtmp
   527  		// BNE		Rtmp, -3(PC)
   528  
   529  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   530  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   531  
   532  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
   533  		// It preserves R0 through R15, g, and its arguments R20 and R21,
   534  		// but may clobber anything else, including R31 (REGTMP).
   535  		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
   536  
   537  		// (InvertFlags (CMP a b)) == (CMP b a)
   538  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   539  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   540  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   541  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   542  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   543  
   544  		// Constant flag values. For any comparison, there are 3 possible
   545  		// outcomes: either the three from the signed total order (<,==,>)
   546  		// or the three from the unsigned total order, depending on which
   547  		// comparison operation was used (CMP or CMPU -- PPC is different from
   548  		// the other architectures, which have a single comparison producing
   549  		// both signed and unsigned comparison results.)
   550  
   551  		// These ops are for temporary use by rewrite rules. They
   552  		// cannot appear in the generated assembly.
   553  		{name: "FlagEQ"}, // equal
   554  		{name: "FlagLT"}, // signed < or unsigned <
   555  		{name: "FlagGT"}, // signed > or unsigned >
   556  
   557  	}
   558  
   559  	blocks := []blockData{
   560  		{name: "EQ"},
   561  		{name: "NE"},
   562  		{name: "LT"},
   563  		{name: "LE"},
   564  		{name: "GT"},
   565  		{name: "GE"},
   566  		{name: "FLT"},
   567  		{name: "FLE"},
   568  		{name: "FGT"},
   569  		{name: "FGE"},
   570  	}
   571  
   572  	archs = append(archs, arch{
   573  		name:            "PPC64",
   574  		pkg:             "cmd/internal/obj/ppc64",
   575  		genfile:         "../../ppc64/ssa.go",
   576  		ops:             ops,
   577  		blocks:          blocks,
   578  		regnames:        regNamesPPC64,
   579  		gpregmask:       gp,
   580  		fpregmask:       fp,
   581  		framepointerreg: int8(num["SP"]),
   582  		linkreg:         -1, // not used
   583  	})
   584  }