github.com/Filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/cmd/compile/internal/ssa/gen/PPC64Ops.go

github.com/Filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/cmd/compile/internal/ssa/gen/PPC64Ops.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  import "strings"
    10  
    11  // Notes:
    12  //  - Less-than-64-bit integer types live in the low portion of registers.
    13  //    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
    14  //  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
    15  //  - *const instructions may use a constant larger than the instruction can encode.
    16  //    In this case the assembler expands to multiple instructions and uses tmp
    17  //    register (R31).
    18  
    19  var regNamesPPC64 = []string{
    20  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    21  	"SP", // REGSP
    22  	"SB", // REGSB
    23  	"R3",
    24  	"R4",
    25  	"R5",
    26  	"R6",
    27  	"R7",
    28  	"R8",
    29  	"R9",
    30  	"R10",
    31  	"R11", // REGCTXT for closures
    32  	"R12",
    33  	"R13", // REGTLS
    34  	"R14",
    35  	"R15",
    36  	"R16",
    37  	"R17",
    38  	"R18",
    39  	"R19",
    40  	"R20",
    41  	"R21",
    42  	"R22",
    43  	"R23",
    44  	"R24",
    45  	"R25",
    46  	"R26",
    47  	"R27",
    48  	"R28",
    49  	"R29",
    50  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    51  	"R31", // REGTMP
    52  
    53  	"F0",
    54  	"F1",
    55  	"F2",
    56  	"F3",
    57  	"F4",
    58  	"F5",
    59  	"F6",
    60  	"F7",
    61  	"F8",
    62  	"F9",
    63  	"F10",
    64  	"F11",
    65  	"F12",
    66  	"F13",
    67  	"F14",
    68  	"F15",
    69  	"F16",
    70  	"F17",
    71  	"F18",
    72  	"F19",
    73  	"F20",
    74  	"F21",
    75  	"F22",
    76  	"F23",
    77  	"F24",
    78  	"F25",
    79  	"F26",
    80  	"F27",
    81  	"F28",
    82  	"F29",
    83  	"F30",
    84  	"F31",
    85  
    86  	// "CR0",
    87  	// "CR1",
    88  	// "CR2",
    89  	// "CR3",
    90  	// "CR4",
    91  	// "CR5",
    92  	// "CR6",
    93  	// "CR7",
    94  
    95  	// "CR",
    96  	// "XER",
    97  	// "LR",
    98  	// "CTR",
    99  }
   100  
   101  func init() {
   102  	// Make map from reg names to reg integers.
   103  	if len(regNamesPPC64) > 64 {
   104  		panic("too many registers")
   105  	}
   106  	num := map[string]int{}
   107  	for i, name := range regNamesPPC64 {
   108  		num[name] = i
   109  	}
   110  	buildReg := func(s string) regMask {
   111  		m := regMask(0)
   112  		for _, r := range strings.Split(s, " ") {
   113  			if n, ok := num[r]; ok {
   114  				m |= regMask(1) << uint(n)
   115  				continue
   116  			}
   117  			panic("register " + r + " not found")
   118  		}
   119  		return m
   120  	}
   121  
   122  	var (
   123  		gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   124  		fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
   125  		sp = buildReg("SP")
   126  		sb = buildReg("SB")
   127  		gr = buildReg("g")
   128  		// cr  = buildReg("CR")
   129  		// ctr = buildReg("CTR")
   130  		// lr  = buildReg("LR")
   131  		tmp  = buildReg("R31")
   132  		ctxt = buildReg("R11")
   133  		// tls = buildReg("R13")
   134  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   135  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   136  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   137  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   138  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   139  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   140  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   141  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   142  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   143  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   144  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   145  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   146  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   147  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   148  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   149  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   150  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   151  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   152  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   153  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   154  		callerSave  = regMask(gp | fp | gr)
   155  	)
   156  	ops := []opData{
   157  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                     // arg0 + arg1
   158  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff", symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym)
   159  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                   // arg0+arg1
   160  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                 // arg0+arg1
   161  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                        // arg0-arg1
   162  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                      // arg0-arg1
   163  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                    // arg0-arg1
   164  
   165  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   166  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   167  
   168  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   169  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   170  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   171  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   172  
   173  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   174  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   175  
   176  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   177  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   178  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   179  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   180  
   181  		{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // arg0 >>a arg1, 64 bits (all sign if arg1 & 64 != 0)
   182  		{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >>a arg1, 32 bits (all sign if arg1 & 32 != 0)
   183  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // arg0 >> arg1, 64 bits  (0 if arg1 & 64 != 0)
   184  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // arg0 >> arg1, 32 bits  (0 if arg1 & 32 != 0)
   185  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << arg1, 64 bits  (0 if arg1 & 64 != 0)
   186  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << arg1, 32 bits  (0 if arg1 & 32 != 0)
   187  
   188  		{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
   189  		{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
   190  
   191  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // arg0 >>a aux, 64 bits
   192  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // arg0 >>a aux, 32 bits
   193  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // arg0 >> aux, 64 bits
   194  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // arg0 >> aux, 32 bits
   195  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
   196  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
   197  
   198  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   199  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   200  
   201  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
   202  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
   203  
   204  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   205  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   206  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
   207  
   208  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   209  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   210  
   211  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   212  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   213  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   214  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   215  
   216  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   217  
   218  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   219  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   220  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   221  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   222  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   223  
   224  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   225  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   226  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   227  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   228  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   229  
   230  		{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"},   // move 64 bits of F register into G register
   231  		{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
   232  
   233  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},               // arg0&arg1
   234  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                // arg0&^arg1
   235  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                 // arg0|arg1
   236  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                  // arg0|^arg1
   237  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},               // ^(arg0|arg1)
   238  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
   239  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
   240  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                  // -arg0 (integer)
   241  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                // -arg0 (floating point)
   242  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                              // sqrt(arg0) (floating point)
   243  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                            // sqrt(arg0) (floating point, single precision)
   244  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                              // floor(arg0), float64
   245  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                               // ceil(arg0), float64
   246  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                              // trunc(arg0), float64
   247  
   248  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
   249  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
   250  		{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
   251  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}}, asm: "ANDCC", aux: "Int64", typ: "Flags"},                             // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   252  
   253  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},                                                               // sign extend int8 to int64
   254  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"},                                                             // zero extend uint8 to uint64
   255  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},                                                               // sign extend int16 to int64
   256  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"},                                                             // zero extend uint16 to uint64
   257  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},                                                               // sign extend int32 to int64
   258  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"},                                                             // zero extend uint32 to uint64
   259  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // zero extend uint8 to uint64
   260  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int16 to int64
   261  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint16 to uint64
   262  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int32 to int64
   263  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint32 to uint64
   264  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},
   265  
   266  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"},
   267  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"},
   268  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   269  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   270  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   271  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   272  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   273  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},
   274  
   275  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero byte to arg0+aux.  arg1=mem
   276  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes to ...
   277  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes to ...
   278  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes to ...
   279  
   280  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
   281  
   282  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   283  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   284  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   285  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   286  
   287  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   288  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   289  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   290  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   291  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   292  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   293  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   294  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   295  
   296  		// pseudo-ops
   297  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   298  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   299  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   300  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   301  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   302  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   303  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   304  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   305  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   306  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   307  
   308  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   309  		// and sorts it to the very beginning of the block to prevent other
   310  		// use of the closure pointer.
   311  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}},
   312  
   313  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   314  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   315  		// Round ops to block fused-multiply-add extraction.
   316  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
   317  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
   318  
   319  		// Convert pointer to integer, takes a memory operand for ordering.
   320  		{name: "MOVDconvert", argLength: 2, reg: gp11, asm: "MOVD"},
   321  
   322  		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true, call: true, symEffect: "None"},                   // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   323  		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gp | sp, ctxt, 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   324  		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true},                 // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   325  
   326  		// large or unaligned zeroing
   327  		// arg0 = address of memory to zero (in R3, changed as side effect)
   328  		// returns mem
   329  		//
   330  		// a loop is generated when there is more than one iteration
   331  		// needed to clear 4 doublewords
   332  		//
   333  		// 	MOVD	$len/32,R31
   334  		//	MOVD	R31,CTR
   335  		//	loop:
   336  		//	MOVD	R0,(R3)
   337  		//	MOVD	R0,8(R3)
   338  		//	MOVD	R0,16(R3)
   339  		//	MOVD	R0,24(R3)
   340  		//	ADD	R3,32
   341  		//	BC	loop
   342  
   343  		// remaining doubleword clears generated as needed
   344  		//	MOVD	R0,(R3)
   345  		//	MOVD	R0,8(R3)
   346  		//	MOVD	R0,16(R3)
   347  		//	MOVD	R0,24(R3)
   348  
   349  		// one or more of these to clear remainder < 8 bytes
   350  		//	MOVW	R0,n1(R3)
   351  		//	MOVH	R0,n2(R3)
   352  		//	MOVB	R0,n3(R3)
   353  		{
   354  			name:      "LoweredZero",
   355  			aux:       "Int64",
   356  			argLength: 2,
   357  			reg: regInfo{
   358  				inputs:   []regMask{buildReg("R3")},
   359  				clobbers: buildReg("R3"),
   360  			},
   361  			clobberFlags:   true,
   362  			typ:            "Mem",
   363  			faultOnNilArg0: true,
   364  		},
   365  		// Loop code:
   366  		//	MOVD len/32,REG_TMP  only for loop
   367  		//	MOVD REG_TMP,CTR     only for loop
   368  		// loop:
   369  		//	MOVD (R4),R7
   370  		//	MOVD 8(R4),R8
   371  		//	MOVD 16(R4),R9
   372  		//	MOVD 24(R4),R10
   373  		//	ADD  R4,$32          only with loop
   374  		//	MOVD R7,(R3)
   375  		//	MOVD R8,8(R3)
   376  		//	MOVD R9,16(R3)
   377  		//	MOVD R10,24(R3)
   378  		//	ADD  R3,$32          only with loop
   379  		//	BC 16,0,loop         only with loop
   380  		// Bytes not moved by this loop are moved
   381  		// with a combination of the following instructions,
   382  		// starting with the largest sizes and generating as
   383  		// many as needed, using the appropriate offset value.
   384  		//	MOVD  n(R4),R7
   385  		//	MOVD  R7,n(R3)
   386  		//	MOVW  n1(R4),R7
   387  		//	MOVW  R7,n1(R3)
   388  		//	MOVH  n2(R4),R7
   389  		//	MOVH  R7,n2(R3)
   390  		//	MOVB  n3(R4),R7
   391  		//	MOVB  R7,n3(R3)
   392  
   393  		{
   394  			name:      "LoweredMove",
   395  			aux:       "Int64",
   396  			argLength: 3,
   397  			reg: regInfo{
   398  				inputs:   []regMask{buildReg("R3"), buildReg("R4")},
   399  				clobbers: buildReg("R3 R4 R7 R8 R9 R10"),
   400  			},
   401  			clobberFlags:   true,
   402  			typ:            "Mem",
   403  			faultOnNilArg0: true,
   404  			faultOnNilArg1: true,
   405  		},
   406  
   407  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", faultOnNilArg0: true, hasSideEffects: true},
   408  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", faultOnNilArg0: true, hasSideEffects: true},
   409  
   410  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", clobberFlags: true, faultOnNilArg0: true},
   411  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", clobberFlags: true, faultOnNilArg0: true},
   412  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", clobberFlags: true, faultOnNilArg0: true},
   413  
   414  		// atomic add32, 64
   415  		// SYNC
   416  		// LDAR         (Rarg0), Rout
   417  		// ADD		Rarg1, Rout
   418  		// STDCCC       Rout, (Rarg0)
   419  		// BNE          -3(PC)
   420  		// ISYNC
   421  		// return new sum
   422  
   423  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   424  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   425  
   426  		// atomic exchange32, 64
   427  		// SYNC
   428  		// LDAR         (Rarg0), Rout
   429  		// STDCCC       Rarg1, (Rarg0)
   430  		// BNE          -2(PC)
   431  		// ISYNC
   432  		// return old val
   433  
   434  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   435  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   436  
   437  		// atomic compare and swap.
   438  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   439  		// if *arg0 == arg1 {
   440  		//   *arg0 = arg2
   441  		//   return (true, memory)
   442  		// } else {
   443  		//   return (false, memory)
   444  		// }
   445  		// SYNC
   446  		// LDAR		(Rarg0), Rtmp
   447  		// CMP		Rarg1, Rtmp
   448  		// BNE		3(PC)
   449  		// STDCCC	Rarg2, (Rarg0)
   450  		// BNE		-4(PC)
   451  		// CBNZ         Rtmp, -4(PC)
   452  		// CSET         EQ, Rout
   453  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   454  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   455  
   456  		// atomic 8 and/or.
   457  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   458  		// LBAR		(Rarg0), Rtmp
   459  		// AND/OR	Rarg1, Rtmp
   460  		// STBCCC	Rtmp, (Rarg0), Rtmp
   461  		// BNE		Rtmp, -3(PC)
   462  
   463  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   464  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   465  
   466  		// (InvertFlags (CMP a b)) == (CMP b a)
   467  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   468  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   469  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   470  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   471  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   472  
   473  		// Constant flag values. For any comparison, there are 3 possible
   474  		// outcomes: either the three from the signed total order (<,==,>)
   475  		// or the three from the unsigned total order, depending on which
   476  		// comparison operation was used (CMP or CMPU -- PPC is different from
   477  		// the other architectures, which have a single comparison producing
   478  		// both signed and unsigned comparison results.)
   479  
   480  		// These ops are for temporary use by rewrite rules. They
   481  		// cannot appear in the generated assembly.
   482  		{name: "FlagEQ"}, // equal
   483  		{name: "FlagLT"}, // signed < or unsigned <
   484  		{name: "FlagGT"}, // signed > or unsigned >
   485  
   486  	}
   487  
   488  	blocks := []blockData{
   489  		{name: "EQ"},
   490  		{name: "NE"},
   491  		{name: "LT"},
   492  		{name: "LE"},
   493  		{name: "GT"},
   494  		{name: "GE"},
   495  		{name: "FLT"},
   496  		{name: "FLE"},
   497  		{name: "FGT"},
   498  		{name: "FGE"},
   499  	}
   500  
   501  	archs = append(archs, arch{
   502  		name:            "PPC64",
   503  		pkg:             "cmd/internal/obj/ppc64",
   504  		genfile:         "../../ppc64/ssa.go",
   505  		ops:             ops,
   506  		blocks:          blocks,
   507  		regnames:        regNamesPPC64,
   508  		gpregmask:       gp,
   509  		fpregmask:       fp,
   510  		framepointerreg: int8(num["SP"]),
   511  		linkreg:         -1, // not used
   512  	})
   513  }