github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/_gen/ARM64.rules (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  (Add(Ptr|64|32|16|8) ...) => (ADD ...)
     6  (Add(32F|64F) ...) => (FADD(S|D) ...)
     7  
     8  (Sub(Ptr|64|32|16|8) ...) => (SUB ...)
     9  (Sub(32F|64F) ...) => (FSUB(S|D) ...)
    10  
    11  (Mul64 ...) => (MUL ...)
    12  (Mul(32|16|8) ...) => (MULW ...)
    13  (Mul(32F|64F) ...) => (FMUL(S|D) ...)
    14  
    15  (Hmul64 ...) => (MULH ...)
    16  (Hmul64u ...) => (UMULH ...)
    17  (Hmul32 x y) => (SRAconst (MULL <typ.Int64> x y) [32])
    18  (Hmul32u x y) => (SRAconst (UMULL <typ.UInt64> x y) [32])
    19  (Select0 (Mul64uhilo x y)) => (UMULH x y)
    20  (Select1 (Mul64uhilo x y)) => (MUL x y)
    21  
    22  (Div64 [false] x y) => (DIV x y)
    23  (Div64u ...) => (UDIV ...)
    24  (Div32 [false] x y) => (DIVW x y)
    25  (Div32u ...) => (UDIVW ...)
    26  (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y))
    27  (Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
    28  (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y))
    29  (Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
    30  (Div32F ...) => (FDIVS ...)
    31  (Div64F ...) => (FDIVD ...)
    32  
    33  (Mod64 x y) => (MOD x y)
    34  (Mod64u ...) => (UMOD ...)
    35  (Mod32 x y) => (MODW x y)
    36  (Mod32u ...) => (UMODW ...)
    37  (Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y))
    38  (Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
    39  (Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y))
    40  (Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
    41  
    42  // (x + y) / 2 with x>=y    =>    (x - y) / 2 + y
    43  (Avg64u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
    44  
    45  (And(64|32|16|8) ...) => (AND ...)
    46  (Or(64|32|16|8) ...) => (OR ...)
    47  (Xor(64|32|16|8) ...) => (XOR ...)
    48  
    49  // unary ops
    50  (Neg(64|32|16|8) ...) => (NEG ...)
    51  (Neg(32F|64F) ...) => (FNEG(S|D) ...)
    52  (Com(64|32|16|8) ...) => (MVN ...)
    53  
    54  // math package intrinsics
    55  (Abs ...) => (FABSD ...)
    56  (Sqrt ...) => (FSQRTD ...)
    57  (Ceil ...) => (FRINTPD ...)
    58  (Floor ...) => (FRINTMD ...)
    59  (Round ...) => (FRINTAD ...)
    60  (RoundToEven ...) => (FRINTND ...)
    61  (Trunc ...) => (FRINTZD ...)
    62  (FMA x y z) => (FMADDD z x y)
    63  
    64  (Sqrt32 ...) => (FSQRTS ...)
    65  
    66  // lowering rotates
    67  // we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first.
    68  (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
    69  (RotateLeft8 <t> x y) => (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y))))
    70  (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
    71  (RotateLeft16 <t> x y) => (RORW <t> (ORshiftLL <typ.UInt32> (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG <typ.Int64> y))
    72  (RotateLeft32 x y) => (RORW x (NEG <y.Type> y))
    73  (RotateLeft64 x y) => (ROR x (NEG <y.Type> y))
    74  
    75  (Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...)
    76  
    77  (Ctz64 <t> x) => (CLZ (RBIT <t> x))
    78  (Ctz32 <t> x) => (CLZW (RBITW <t> x))
    79  (Ctz16 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
    80  (Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
    81  
    82  (PopCount64 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
    83  (PopCount32 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
    84  (PopCount16 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
    85  
    86  // Load args directly into the register class where it will be used.
    87  (FMOVDgpfp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
    88  (FMOVDfpgp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
    89  
    90  // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set.
    91  (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem)
    92  (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem)
    93  (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem)
    94  (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem)
    95  
    96  // float <=> int register moves, with no conversion.
    97  // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
    98  (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val)
    99  (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val)
   100  (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val)
   101  (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val)
   102  
   103  (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
   104  (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW <typ.Int> x))
   105  
   106  (Bswap64 ...) => (REV ...)
   107  (Bswap32 ...) => (REVW ...)
   108  
   109  (BitRev64 ...) => (RBIT ...)
   110  (BitRev32 ...) => (RBITW ...)
   111  (BitRev16 x) => (SRLconst [48] (RBIT <typ.UInt64> x))
   112  (BitRev8 x) => (SRLconst [56] (RBIT <typ.UInt64> x))
   113  
   114  // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into
   115  // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or
   116  // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant.
   117  // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass.
   118  (UMOD <typ.UInt64> x y) => (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
   119  (UMODW <typ.UInt32> x y) => (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
   120  
   121  // 64-bit addition with carry.
   122  (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
   123  (Select1 (Add64carry x y c)) => (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
   124  
   125  // 64-bit subtraction with borrowing.
   126  (Select0 (Sub64borrow x y bo)) => (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))
   127  (Select1 (Sub64borrow x y bo)) => (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))))
   128  
   129  // boolean ops -- booleans are represented with 0=false, 1=true
   130  (AndB ...) => (AND ...)
   131  (OrB ...) => (OR ...)
   132  (EqB x y) => (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
   133  (NeqB ...) => (XOR ...)
   134  (Not x) => (XOR (MOVDconst [1]) x)
   135  
   136  // shifts
   137  // hardware instruction uses only the low 6 bits of the shift
   138  // we compare to 64 to ensure Go semantics for large shifts
   139  // Rules about rotates with non-const shift are based on the following rules,
   140  // if the following rules change, please also modify the rules based on them.
   141  
   142  // check shiftIsBounded first, if shift value is proved to be valid then we
   143  // can do the shift directly.
   144  // left shift
   145  (Lsh(64|32|16|8)x64 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
   146  (Lsh(64|32|16|8)x32 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
   147  (Lsh(64|32|16|8)x16 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
   148  (Lsh(64|32|16|8)x8 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
   149  
   150  // signed right shift
   151  (Rsh64x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> x y)
   152  (Rsh32x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt32to64 x) y)
   153  (Rsh16x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) y)
   154  (Rsh8x(64|32|16|8)  <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) y)
   155  
   156  // unsigned right shift
   157  (Rsh64Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> x y)
   158  (Rsh32Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt32to64 x) y)
   159  (Rsh16Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt16to64 x) y)
   160  (Rsh8Ux(64|32|16|8)  <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt8to64 x) y)
   161  
   162  // shift value may be out of range, use CMP + CSEL instead
   163  (Lsh64x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   164  (Lsh64x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   165  (Lsh64x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   166  (Lsh64x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   167  
   168  (Lsh32x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   169  (Lsh32x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   170  (Lsh32x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   171  (Lsh32x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   172  
   173  (Lsh16x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   174  (Lsh16x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   175  (Lsh16x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   176  (Lsh16x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   177  
   178  (Lsh8x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   179  (Lsh8x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   180  (Lsh8x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   181  (Lsh8x8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   182  
   183  (Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   184  (Rsh64Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   185  (Rsh64Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   186  (Rsh64Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   187  
   188  (Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   189  (Rsh32Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   190  (Rsh32Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   191  (Rsh32Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   192  
   193  (Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   194  (Rsh16Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   195  (Rsh16Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   196  (Rsh16Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   197  
   198  (Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   199  (Rsh8Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   200  (Rsh8Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   201  (Rsh8Ux8  <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   202  
   203  (Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   204  (Rsh64x32 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   205  (Rsh64x16 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   206  (Rsh64x8  x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   207  
   208  (Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   209  (Rsh32x32 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   210  (Rsh32x16 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   211  (Rsh32x8  x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   212  
   213  (Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   214  (Rsh16x32 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   215  (Rsh16x16 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   216  (Rsh16x8  x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   217  
   218  (Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   219  (Rsh8x32 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   220  (Rsh8x16 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   221  (Rsh8x8  x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   222  
   223  // constants
   224  (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
   225  (Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)])
   226  (ConstNil) => (MOVDconst [0])
   227  (ConstBool [t]) => (MOVDconst [b2i(t)])
   228  
   229  (Slicemask <t> x) => (SRAconst (NEG <t> x) [63])
   230  
   231  // truncations
   232  // Because we ignore high parts of registers, truncates are just copies.
   233  (Trunc16to8 ...) => (Copy ...)
   234  (Trunc32to8 ...) => (Copy ...)
   235  (Trunc32to16 ...) => (Copy ...)
   236  (Trunc64to8 ...) => (Copy ...)
   237  (Trunc64to16 ...) => (Copy ...)
   238  (Trunc64to32 ...) => (Copy ...)
   239  
   240  // Zero-/Sign-extensions
   241  (ZeroExt8to16 ...) => (MOVBUreg ...)
   242  (ZeroExt8to32 ...) => (MOVBUreg ...)
   243  (ZeroExt16to32 ...) => (MOVHUreg ...)
   244  (ZeroExt8to64 ...) => (MOVBUreg ...)
   245  (ZeroExt16to64 ...) => (MOVHUreg ...)
   246  (ZeroExt32to64 ...) => (MOVWUreg ...)
   247  
   248  (SignExt8to16 ...) => (MOVBreg ...)
   249  (SignExt8to32 ...) => (MOVBreg ...)
   250  (SignExt16to32 ...) => (MOVHreg ...)
   251  (SignExt8to64 ...) => (MOVBreg ...)
   252  (SignExt16to64 ...) => (MOVHreg ...)
   253  (SignExt32to64 ...) => (MOVWreg ...)
   254  
   255  // float <=> int conversion
   256  (Cvt32to32F ...) => (SCVTFWS ...)
   257  (Cvt32to64F ...) => (SCVTFWD ...)
   258  (Cvt64to32F ...) => (SCVTFS ...)
   259  (Cvt64to64F ...) => (SCVTFD ...)
   260  (Cvt32Uto32F ...) => (UCVTFWS ...)
   261  (Cvt32Uto64F ...) => (UCVTFWD ...)
   262  (Cvt64Uto32F ...) => (UCVTFS ...)
   263  (Cvt64Uto64F ...) => (UCVTFD ...)
   264  (Cvt32Fto32 ...) => (FCVTZSSW ...)
   265  (Cvt64Fto32 ...) => (FCVTZSDW ...)
   266  (Cvt32Fto64 ...) => (FCVTZSS ...)
   267  (Cvt64Fto64 ...) => (FCVTZSD ...)
   268  (Cvt32Fto32U ...) => (FCVTZUSW ...)
   269  (Cvt64Fto32U ...) => (FCVTZUDW ...)
   270  (Cvt32Fto64U ...) => (FCVTZUS ...)
   271  (Cvt64Fto64U ...) => (FCVTZUD ...)
   272  (Cvt32Fto64F ...) => (FCVTSD ...)
   273  (Cvt64Fto32F ...) => (FCVTDS ...)
   274  
   275  (CvtBoolToUint8 ...) => (Copy ...)
   276  
   277  (Round32F ...) => (LoweredRound32F ...)
   278  (Round64F ...) => (LoweredRound64F ...)
   279  
   280  // comparisons
   281  (Eq8 x y)  => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   282  (Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   283  (Eq32 x y) => (Equal (CMPW x y))
   284  (Eq64 x y) => (Equal (CMP x y))
   285  (EqPtr x y) => (Equal (CMP x y))
   286  (Eq32F x y) => (Equal (FCMPS x y))
   287  (Eq64F x y) => (Equal (FCMPD x y))
   288  
   289  (Neq8 x y)  => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   290  (Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   291  (Neq32 x y) => (NotEqual (CMPW x y))
   292  (Neq64 x y) => (NotEqual (CMP x y))
   293  (NeqPtr x y) => (NotEqual (CMP x y))
   294  (Neq32F x y) => (NotEqual (FCMPS x y))
   295  (Neq64F x y) => (NotEqual (FCMPD x y))
   296  
   297  (Less8 x y)  => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   298  (Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   299  (Less32 x y) => (LessThan (CMPW x y))
   300  (Less64 x y) => (LessThan (CMP x y))
   301  
   302  // Set condition flags for floating-point comparisons "x < y"
   303  // and "x <= y". Because if either or both of the operands are
   304  // NaNs, all three of (x < y), (x == y) and (x > y) are false,
   305  // and ARM Manual says FCMP instruction sets PSTATE.<N,Z,C,V>
   306  // of this case to (0, 0, 1, 1).
   307  (Less32F x y) => (LessThanF (FCMPS x y))
   308  (Less64F x y) => (LessThanF (FCMPD x y))
   309  
   310  // For an unsigned integer x, the following rules are useful when combining branch
   311  // 0 <  x  =>  x != 0
   312  // x <= 0  =>  x == 0
   313  // x <  1  =>  x == 0
   314  // 1 <= x  =>  x != 0
   315  (Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x)
   316  (Leq(8U|16U|32U|64U) x zero:(MOVDconst [0]))  => (Eq(8|16|32|64) x zero)
   317  (Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0]))
   318  (Leq(8U|16U|32U|64U) (MOVDconst [1]) x)  => (Neq(8|16|32|64) (MOVDconst [0]) x)
   319  
   320  (Less8U x y)  => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   321  (Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   322  (Less32U x y) => (LessThanU (CMPW x y))
   323  (Less64U x y) => (LessThanU (CMP x y))
   324  
   325  (Leq8 x y)  => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   326  (Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   327  (Leq32 x y) => (LessEqual (CMPW x y))
   328  (Leq64 x y) => (LessEqual (CMP x y))
   329  
   330  // Refer to the comments for op Less64F above.
   331  (Leq32F x y) => (LessEqualF (FCMPS x y))
   332  (Leq64F x y) => (LessEqualF (FCMPD x y))
   333  
   334  (Leq8U x y)  => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   335  (Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   336  (Leq32U x y) => (LessEqualU (CMPW x y))
   337  (Leq64U x y) => (LessEqualU (CMP x y))
   338  
   339  // Optimize comparison between a floating-point value and 0.0 with "FCMP $(0.0), Fn"
   340  (FCMPS x (FMOVSconst [0])) => (FCMPS0 x)
   341  (FCMPS (FMOVSconst [0]) x) => (InvertFlags (FCMPS0 x))
   342  (FCMPD x (FMOVDconst [0])) => (FCMPD0 x)
   343  (FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x))
   344  
   345  // CSEL needs a flag-generating argument. Synthesize a TSTW if necessary.
   346  (CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval))
   347  (CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL [OpARM64NotEqual] x y (TSTWconst [1] boolval))
   348  
   349  (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr)
   350  (OffPtr [off] ptr) => (ADDconst [off] ptr)
   351  
   352  (Addr {sym} base) => (MOVDaddr {sym} base)
   353  (LocalAddr {sym} base _) => (MOVDaddr {sym} base)
   354  
   355  // loads
   356  (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
   357  (Load <t> ptr mem) && (is8BitInt(t)  && isSigned(t))  => (MOVBload ptr mem)
   358  (Load <t> ptr mem) && (is8BitInt(t)  && !isSigned(t)) => (MOVBUload ptr mem)
   359  (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t))  => (MOVHload ptr mem)
   360  (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) => (MOVHUload ptr mem)
   361  (Load <t> ptr mem) && (is32BitInt(t) && isSigned(t))  => (MOVWload ptr mem)
   362  (Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) => (MOVWUload ptr mem)
   363  (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem)
   364  (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem)
   365  (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem)
   366  
   367  // stores
   368  (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
   369  (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
   370  (Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
   371  (Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
   372  (Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVSstore ptr val mem)
   373  (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
   374  
   375  // zeroing
   376  (Zero [0] _ mem) => mem
   377  (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem)
   378  (Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem)
   379  (Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem)
   380  (Zero [3] ptr mem) =>
   381  	(MOVBstore [2] ptr (MOVDconst [0])
   382  		(MOVHstore ptr (MOVDconst [0]) mem))
   383  (Zero [5] ptr mem) =>
   384  	(MOVBstore [4] ptr (MOVDconst [0])
   385  		(MOVWstore ptr (MOVDconst [0]) mem))
   386  (Zero [6] ptr mem) =>
   387  	(MOVHstore [4] ptr (MOVDconst [0])
   388  		(MOVWstore ptr (MOVDconst [0]) mem))
   389  (Zero [7] ptr mem) =>
   390  	(MOVWstore [3] ptr (MOVDconst [0])
   391  		(MOVWstore ptr (MOVDconst [0]) mem))
   392  (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst [0]) mem)
   393  (Zero [9] ptr mem) =>
   394  	(MOVBstore [8] ptr (MOVDconst [0])
   395  		(MOVDstore ptr (MOVDconst [0]) mem))
   396  (Zero [10] ptr mem) =>
   397  	(MOVHstore [8] ptr (MOVDconst [0])
   398  		(MOVDstore ptr (MOVDconst [0]) mem))
   399  (Zero [11] ptr mem) =>
   400  	(MOVDstore [3] ptr (MOVDconst [0])
   401  		(MOVDstore ptr (MOVDconst [0]) mem))
   402  (Zero [12] ptr mem) =>
   403  	(MOVWstore [8] ptr (MOVDconst [0])
   404  		(MOVDstore ptr (MOVDconst [0]) mem))
   405  (Zero [13] ptr mem) =>
   406  	(MOVDstore [5] ptr (MOVDconst [0])
   407  		(MOVDstore ptr (MOVDconst [0]) mem))
   408  (Zero [14] ptr mem) =>
   409  	(MOVDstore [6] ptr (MOVDconst [0])
   410  		(MOVDstore ptr (MOVDconst [0]) mem))
   411  (Zero [15] ptr mem) =>
   412  	(MOVDstore [7] ptr (MOVDconst [0])
   413  		(MOVDstore ptr (MOVDconst [0]) mem))
   414  (Zero [16] ptr mem) =>
   415  	(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
   416  
   417  (Zero [32] ptr mem) =>
   418  	(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   419  		(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
   420  
   421  (Zero [48] ptr mem) =>
   422  	(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   423  		(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   424  			(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
   425  
   426  (Zero [64] ptr mem) =>
   427  	(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
   428  		(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   429  			(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   430  				(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
   431  
   432  // strip off fractional word zeroing
   433  (Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
   434  	(Zero [8]
   435  		(OffPtr <ptr.Type> ptr [s-8])
   436  		(Zero [s-s%16] ptr mem))
   437  (Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
   438  	(Zero [16]
   439  		(OffPtr <ptr.Type> ptr [s-16])
   440  		(Zero [s-s%16] ptr mem))
   441  
   442  // medium zeroing uses a duff device
   443  // 4, 16, and 64 are magic constants, see runtime/mkduff.go
   444  (Zero [s] ptr mem)
   445  	&& s%16 == 0 && s > 64 && s <= 16*64
   446  	&& !config.noDuffDevice =>
   447  	(DUFFZERO [4 * (64 - s/16)] ptr mem)
   448  
   449  // large zeroing uses a loop
   450  (Zero [s] ptr mem)
   451  	&& s%16 == 0 && (s > 16*64 || config.noDuffDevice) =>
   452  	(LoweredZero
   453  		ptr
   454  		(ADDconst <ptr.Type> [s-16] ptr)
   455  		mem)
   456  
   457  // moves
   458  (Move [0] _ _ mem) => mem
   459  (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem)
   460  (Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem)
   461  (Move [3] dst src mem) =>
   462  	(MOVBstore [2] dst (MOVBUload [2] src mem)
   463  		(MOVHstore dst (MOVHUload src mem) mem))
   464  (Move [4] dst src mem) => (MOVWstore dst (MOVWUload src mem) mem)
   465  (Move [5] dst src mem) =>
   466  	(MOVBstore [4] dst (MOVBUload [4] src mem)
   467  		(MOVWstore dst (MOVWUload src mem) mem))
   468  (Move [6] dst src mem) =>
   469  	(MOVHstore [4] dst (MOVHUload [4] src mem)
   470  		(MOVWstore dst (MOVWUload src mem) mem))
   471  (Move [7] dst src mem) =>
   472  	(MOVWstore [3] dst (MOVWUload [3] src mem)
   473  		(MOVWstore dst (MOVWUload src mem) mem))
   474  (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
   475  (Move [9] dst src mem) =>
   476  	(MOVBstore [8] dst (MOVBUload [8] src mem)
   477  		(MOVDstore dst (MOVDload src mem) mem))
   478  (Move [10] dst src mem) =>
   479  	(MOVHstore [8] dst (MOVHUload [8] src mem)
   480  		(MOVDstore dst (MOVDload src mem) mem))
   481  (Move [11] dst src mem) =>
   482  	(MOVDstore [3] dst (MOVDload [3] src mem)
   483  		(MOVDstore dst (MOVDload src mem) mem))
   484  (Move [12] dst src mem) =>
   485  	(MOVWstore [8] dst (MOVWUload [8] src mem)
   486  		(MOVDstore dst (MOVDload src mem) mem))
   487  (Move [13] dst src mem) =>
   488  	(MOVDstore [5] dst (MOVDload [5] src mem)
   489  		(MOVDstore dst (MOVDload src mem) mem))
   490  (Move [14] dst src mem) =>
   491  	(MOVDstore [6] dst (MOVDload [6] src mem)
   492  		(MOVDstore dst (MOVDload src mem) mem))
   493  (Move [15] dst src mem) =>
   494  	(MOVDstore [7] dst (MOVDload [7] src mem)
   495  		(MOVDstore dst (MOVDload src mem) mem))
   496  (Move [16] dst src mem) =>
   497  	(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)
   498  (Move [32] dst src mem) =>
   499  	(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
   500  		(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))
   501  (Move [48] dst src mem) =>
   502  	(STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem))
   503  		(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
   504  			(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)))
   505  (Move [64] dst src mem) =>
   506  	(STP [48] dst (Select0 <typ.UInt64> (LDP [48] src mem)) (Select1 <typ.UInt64> (LDP [48] src mem))
   507  		(STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem))
   508  			(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
   509  				(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
   510  
   511  // strip off fractional word move
   512  (Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
   513  	(Move [8]
   514  		(OffPtr <dst.Type> dst [s-8])
   515  		(OffPtr <src.Type> src [s-8])
   516  		(Move [s-s%16] dst src mem))
   517  (Move [s] dst src mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
   518  	(Move [16]
   519  		(OffPtr <dst.Type> dst [s-16])
   520  		(OffPtr <src.Type> src [s-16])
   521  		(Move [s-s%16] dst src mem))
   522  
   523  // medium move uses a duff device
   524  (Move [s] dst src mem)
   525  	&& s > 64 && s <= 16*64 && s%16 == 0
   526  	&& !config.noDuffDevice && logLargeCopy(v, s) =>
   527  	(DUFFCOPY [8 * (64 - s/16)] dst src mem)
   528  // 8 is the number of bytes to encode:
   529  //
   530  // LDP.P   16(R16), (R26, R27)
   531  // STP.P   (R26, R27), 16(R17)
   532  //
   533  // 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy
   534  
   535  // large move uses a loop
   536  (Move [s] dst src mem)
   537  	&& s%16 == 0 && (s > 16*64 || config.noDuffDevice)
   538  	&& logLargeCopy(v, s) =>
   539  	(LoweredMove
   540  		dst
   541  		src
   542  		(ADDconst <src.Type> src [s-16])
   543  		mem)
   544  
   545  // calls
   546  (StaticCall ...) => (CALLstatic ...)
   547  (ClosureCall ...) => (CALLclosure ...)
   548  (InterCall ...) => (CALLinter ...)
   549  (TailCall ...) => (CALLtail ...)
   550  
   551  // checks
   552  (NilCheck ...) => (LoweredNilCheck ...)
   553  (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr))
   554  (IsInBounds idx len) => (LessThanU (CMP idx len))
   555  (IsSliceInBounds idx len) => (LessEqualU (CMP idx len))
   556  
   557  // pseudo-ops
   558  (GetClosurePtr ...) => (LoweredGetClosurePtr ...)
   559  (GetCallerSP ...) => (LoweredGetCallerSP ...)
   560  (GetCallerPC ...) => (LoweredGetCallerPC ...)
   561  
   562  // Absorb pseudo-ops into blocks.
   563  (If (Equal cc) yes no) => (EQ cc yes no)
   564  (If (NotEqual cc) yes no) => (NE cc yes no)
   565  (If (LessThan cc) yes no) => (LT cc yes no)
   566  (If (LessThanU cc) yes no) => (ULT cc yes no)
   567  (If (LessEqual cc) yes no) => (LE cc yes no)
   568  (If (LessEqualU cc) yes no) => (ULE cc yes no)
   569  (If (GreaterThan cc) yes no) => (GT cc yes no)
   570  (If (GreaterThanU cc) yes no) => (UGT cc yes no)
   571  (If (GreaterEqual cc) yes no) => (GE cc yes no)
   572  (If (GreaterEqualU cc) yes no) => (UGE cc yes no)
   573  (If (LessThanF cc) yes no) => (FLT cc yes no)
   574  (If (LessEqualF cc) yes no) => (FLE cc yes no)
   575  (If (GreaterThanF cc) yes no) => (FGT cc yes no)
   576  (If (GreaterEqualF cc) yes no) => (FGE cc yes no)
   577  
   578  (If cond yes no) => (TBNZ [0] cond yes no)
   579  
   580  (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
   581  
   582  // atomic intrinsics
   583  // Note: these ops do not accept offset.
   584  (AtomicLoad8   ...) => (LDARB ...)
   585  (AtomicLoad32  ...) => (LDARW ...)
   586  (AtomicLoad64  ...) => (LDAR  ...)
   587  (AtomicLoadPtr ...) => (LDAR  ...)
   588  
   589  (AtomicStore8       ...) => (STLRB ...)
   590  (AtomicStore32      ...) => (STLRW ...)
   591  (AtomicStore64      ...) => (STLR  ...)
   592  (AtomicStorePtrNoWB ...) => (STLR  ...)
   593  
   594  (AtomicExchange(32|64)       ...) => (LoweredAtomicExchange(32|64) ...)
   595  (AtomicAdd(32|64)            ...) => (LoweredAtomicAdd(32|64) ...)
   596  (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
   597  
   598  (AtomicAdd(32|64)Variant            ...) => (LoweredAtomicAdd(32|64)Variant      ...)
   599  (AtomicExchange(32|64)Variant       ...) => (LoweredAtomicExchange(32|64)Variant ...)
   600  (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant      ...)
   601  
   602  // Currently the updated value is not used, but we need a register to temporarily hold it.
   603  (AtomicAnd8  ptr val mem) => (Select1 (LoweredAtomicAnd8  ptr val mem))
   604  (AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
   605  (AtomicOr8   ptr val mem) => (Select1 (LoweredAtomicOr8   ptr val mem))
   606  (AtomicOr32  ptr val mem) => (Select1 (LoweredAtomicOr32  ptr val mem))
   607  
   608  (AtomicAnd8Variant  ptr val mem) => (Select1 (LoweredAtomicAnd8Variant  ptr val mem))
   609  (AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
   610  (AtomicOr8Variant   ptr val mem) => (Select1 (LoweredAtomicOr8Variant   ptr val mem))
   611  (AtomicOr32Variant  ptr val mem) => (Select1 (LoweredAtomicOr32Variant  ptr val mem))
   612  
   613  // Write barrier.
   614  (WB ...) => (LoweredWB ...)
   615  
   616  // Publication barrier (0xe is ST option)
   617  (PubBarrier mem) => (DMB [0xe] mem)
   618  
   619  (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
   620  (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
   621  (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
   622  
   623  // Optimizations
   624  
   625  // Absorb boolean tests into block
   626  (NZ (Equal cc) yes no) => (EQ cc yes no)
   627  (NZ (NotEqual cc) yes no) => (NE cc yes no)
   628  (NZ (LessThan cc) yes no) => (LT cc yes no)
   629  (NZ (LessThanU cc) yes no) => (ULT cc yes no)
   630  (NZ (LessEqual cc) yes no) => (LE cc yes no)
   631  (NZ (LessEqualU cc) yes no) => (ULE cc yes no)
   632  (NZ (GreaterThan cc) yes no) => (GT cc yes no)
   633  (NZ (GreaterThanU cc) yes no) => (UGT cc yes no)
   634  (NZ (GreaterEqual cc) yes no) => (GE cc yes no)
   635  (NZ (GreaterEqualU cc) yes no) => (UGE cc yes no)
   636  (NZ (LessThanF cc) yes no) => (FLT cc yes no)
   637  (NZ (LessEqualF cc) yes no) => (FLE cc yes no)
   638  (NZ (GreaterThanF cc) yes no) => (FGT cc yes no)
   639  (NZ (GreaterEqualF cc) yes no) => (FGE cc yes no)
   640  
   641  (TBNZ [0] (Equal cc) yes no) => (EQ cc yes no)
   642  (TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no)
   643  (TBNZ [0] (LessThan cc) yes no) => (LT cc yes no)
   644  (TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no)
   645  (TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no)
   646  (TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no)
   647  (TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no)
   648  (TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no)
   649  (TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no)
   650  (TBNZ [0] (GreaterEqualU cc) yes no) => (UGE cc yes no)
   651  (TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no)
   652  (TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no)
   653  (TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no)
   654  (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)
   655  
   656  (EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no)
   657  (NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no)
   658  (LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no)
   659  (LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TST x y) yes no)
   660  (GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no)
   661  (GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no)
   662  
   663  (EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no)
   664  (NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no)
   665  (LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no)
   666  (LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no)
   667  (GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no)
   668  (GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no)
   669  
   670  (EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no)
   671  (NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no)
   672  (LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no)
   673  (LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TSTW x y) yes no)
   674  (GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no)
   675  (GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no)
   676  
   677  (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no)
   678  (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no)
   679  (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no)
   680  (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no)
   681  (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no)
   682  (GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no)
   683  
   684  // For conditional instructions such as CSET, CSEL.
   685  (Equal (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TST x y))
   686  (NotEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TST x y))
   687  (LessThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TST x y))
   688  (LessEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TST x y))
   689  (GreaterThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TST x y))
   690  (GreaterEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TST x y))
   691  
   692  (Equal (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTWconst [int32(c)] y))
   693  (NotEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTWconst [int32(c)] y))
   694  (LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTWconst [int32(c)] y))
   695  (LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTWconst [int32(c)] y))
   696  (GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTWconst [int32(c)] y))
   697  (GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTWconst [int32(c)] y))
   698  
   699  (Equal (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TSTW x y))
   700  (NotEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TSTW x y))
   701  (LessThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TSTW x y))
   702  (LessEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TSTW x y))
   703  (GreaterThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TSTW x y))
   704  (GreaterEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TSTW x y))
   705  
   706  (Equal (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTconst [c] y))
   707  (NotEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTconst [c] y))
   708  (LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y))
   709  (LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y))
   710  (GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y))
   711  (GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y))
   712  
   713  (EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no)
   714  (NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no)
   715  (LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no)
   716  (LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no)
   717  (GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no)
   718  (GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no)
   719  
   720  (EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no)
   721  (NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no)
   722  (LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no)
   723  (LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no)
   724  (GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no)
   725  (GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no)
   726  
   727  (EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
   728  (NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
   729  (LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no)
   730  (LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no)
   731  (GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no)
   732  (GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no)
   733  
   734  (EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
   735  (NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
   736  (LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no)
   737  (LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no)
   738  (GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no)
   739  (GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no)
   740  
   741  // CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63
   742  (EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
   743  (NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
   744  
   745  (Equal (CMP x z:(NEG y))) && z.Uses == 1 => (Equal (CMN x y))
   746  (NotEqual (CMP x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMN x y))
   747  
   748  // CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31
   749  (EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
   750  (NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
   751  
   752  (Equal (CMPW x z:(NEG y))) && z.Uses == 1 => (Equal (CMNW x y))
   753  (NotEqual (CMPW x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMNW x y))
   754  
   755  // For conditional instructions such as CSET, CSEL.
   756  // TODO: add support for LT, LE, GT, GE, overflow needs to be considered.
   757  (Equal (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNconst [c] y))
   758  (NotEqual (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNconst [c] y))
   759  
   760  (Equal (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNWconst [int32(c)] y))
   761  (NotEqual (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNWconst [int32(c)] y))
   762  
   763  (Equal (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMN x y))
   764  (NotEqual (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMN x y))
   765  
   766  (Equal (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMNW x y))
   767  (NotEqual (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMNW x y))
   768  
   769  (Equal (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (Equal (CMN a (MUL <x.Type> x y)))
   770  (NotEqual (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (NotEqual (CMN a (MUL <x.Type> x y)))
   771  
   772  (Equal (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (Equal (CMP a (MUL <x.Type> x y)))
   773  (NotEqual (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (NotEqual (CMP a (MUL <x.Type> x y)))
   774  
   775  (Equal (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (Equal (CMNW a (MULW <x.Type> x y)))
   776  (NotEqual (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (NotEqual (CMNW a (MULW <x.Type> x y)))
   777  
   778  (Equal (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (Equal (CMPW a (MULW <x.Type> x y)))
   779  (NotEqual (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (NotEqual (CMPW a (MULW <x.Type> x y)))
   780  
   781  (CMPconst [c] y) && c < 0 && c != -1<<63 => (CMNconst [-c] y)
   782  (CMPWconst [c] y) && c < 0 && c != -1<<31 => (CMNWconst [-c] y)
   783  (CMNconst [c] y) && c < 0 && c != -1<<63 => (CMPconst [-c] y)
   784  (CMNWconst [c] y) && c < 0 && c != -1<<31 => (CMPWconst [-c] y)
   785  
   786  (EQ (CMPconst [0] x) yes no) => (Z x yes no)
   787  (NE (CMPconst [0] x) yes no) => (NZ x yes no)
   788  (EQ (CMPWconst [0] x) yes no) => (ZW x yes no)
   789  (NE (CMPWconst [0] x) yes no) => (NZW x yes no)
   790  
   791  (EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL <x.Type> x y)) yes no)
   792  (NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL <x.Type> x y)) yes no)
   793  (LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL <x.Type> x y)) yes no)
   794  (LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL <x.Type> x y)) yes no)
   795  (GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL <x.Type> x y)) yes no)
   796  (GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL <x.Type> x y)) yes no)
   797  
   798  (EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no)
   799  (NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL <x.Type> x y)) yes no)
   800  (LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL <x.Type> x y)) yes no)
   801  (LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL <x.Type> x y)) yes no)
   802  (GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL <x.Type> x y)) yes no)
   803  (GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL <x.Type> x y)) yes no)
   804  
   805  (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW <x.Type> x y)) yes no)
   806  (NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW <x.Type> x y)) yes no)
   807  (LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW <x.Type> x y)) yes no)
   808  (LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW <x.Type> x y)) yes no)
   809  (GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW <x.Type> x y)) yes no)
   810  (GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW <x.Type> x y)) yes no)
   811  
   812  (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW <x.Type> x y)) yes no)
   813  (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW <x.Type> x y)) yes no)
   814  (LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW <x.Type> x y)) yes no)
   815  (LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW <x.Type> x y)) yes no)
   816  (GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW <x.Type> x y)) yes no)
   817  (GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW <x.Type> x y)) yes no)
   818  
   819  // Absorb bit-tests into block
   820  (Z  (ANDconst [c] x) yes no) && oneBit(c) => (TBZ  [int64(ntz64(c))] x yes no)
   821  (NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
   822  (ZW  (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ  [int64(ntz64(int64(uint32(c))))] x yes no)
   823  (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
   824  (EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ  [int64(ntz64(c))] x yes no)
   825  (NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
   826  (EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ  [int64(ntz64(int64(uint32(c))))] x yes no)
   827  (NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
   828  
   829  // Test sign-bit for signed comparisons against zero
   830  (GE (CMPWconst [0] x) yes no) => (TBZ  [31] x yes no)
   831  (GE (CMPconst [0] x) yes no) => (TBZ  [63] x yes no)
   832  (LT (CMPWconst [0] x) yes no) => (TBNZ  [31] x yes no)
   833  (LT (CMPconst [0] x) yes no) => (TBNZ  [63] x yes no)
   834  
   835  // fold offset into address
   836  (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) =>
   837  	 (MOVDaddr [int32(off1)+off2] {sym} ptr)
   838  
   839  // fold address into load/store
   840  (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   841  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   842  	(MOVBload [off1+int32(off2)] {sym} ptr mem)
   843  (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   844  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   845  	(MOVBUload [off1+int32(off2)] {sym} ptr mem)
   846  (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   847  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   848  	(MOVHload [off1+int32(off2)] {sym} ptr mem)
   849  (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   850  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   851  	(MOVHUload [off1+int32(off2)] {sym} ptr mem)
   852  (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   853  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   854  	(MOVWload [off1+int32(off2)] {sym} ptr mem)
   855  (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   856  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   857  	(MOVWUload [off1+int32(off2)] {sym} ptr mem)
   858  (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   859  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   860  	(MOVDload [off1+int32(off2)] {sym} ptr mem)
   861  (LDP [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   862  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   863  	(LDP [off1+int32(off2)] {sym} ptr mem)
   864  (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   865  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   866  	(FMOVSload [off1+int32(off2)] {sym} ptr mem)
   867  (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   868  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   869  	(FMOVDload [off1+int32(off2)] {sym} ptr mem)
   870  
   871  // register indexed load
   872  (MOVDload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem)
   873  (MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx ptr idx mem)
   874  (MOVWload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx ptr idx mem)
   875  (MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx ptr idx mem)
   876  (MOVHload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx ptr idx mem)
   877  (MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBUloadidx ptr idx mem)
   878  (MOVBload  [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem)
   879  (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem)
   880  (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem)
   881  (MOVDloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload  [int32(c)] ptr mem)
   882  (MOVDloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload  [int32(c)] ptr mem)
   883  (MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem)
   884  (MOVWUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem)
   885  (MOVWloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWload  [int32(c)] ptr mem)
   886  (MOVWloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWload  [int32(c)] ptr mem)
   887  (MOVHUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem)
   888  (MOVHUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem)
   889  (MOVHloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHload  [int32(c)] ptr mem)
   890  (MOVHloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHload  [int32(c)] ptr mem)
   891  (MOVBUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem)
   892  (MOVBUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem)
   893  (MOVBloadidx  ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBload  [int32(c)] ptr mem)
   894  (MOVBloadidx  (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBload  [int32(c)] ptr mem)
   895  (FMOVSloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem)
   896  (FMOVSloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem)
   897  (FMOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem)
   898  (FMOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem)
   899  
   900  // shifted register indexed load
   901  (MOVDload  [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx8 ptr idx mem)
   902  (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx4 ptr idx mem)
   903  (MOVWload  [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx4 ptr idx mem)
   904  (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx2 ptr idx mem)
   905  (MOVHload  [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx2 ptr idx mem)
   906  (MOVDloadidx  ptr (SLLconst [3] idx) mem) => (MOVDloadidx8 ptr idx mem)
   907  (MOVWloadidx  ptr (SLLconst [2] idx) mem) => (MOVWloadidx4 ptr idx mem)
   908  (MOVWUloadidx ptr (SLLconst [2] idx) mem) => (MOVWUloadidx4 ptr idx mem)
   909  (MOVHloadidx  ptr (SLLconst [1] idx) mem) => (MOVHloadidx2 ptr idx mem)
   910  (MOVHUloadidx ptr (SLLconst [1] idx) mem) => (MOVHUloadidx2 ptr idx mem)
   911  (MOVHloadidx  ptr (ADD idx idx) mem) => (MOVHloadidx2 ptr idx mem)
   912  (MOVHUloadidx ptr (ADD idx idx) mem) => (MOVHUloadidx2 ptr idx mem)
   913  (MOVDloadidx  (SLLconst [3] idx) ptr mem) => (MOVDloadidx8 ptr idx mem)
   914  (MOVWloadidx  (SLLconst [2] idx) ptr mem) => (MOVWloadidx4 ptr idx mem)
   915  (MOVWUloadidx (SLLconst [2] idx) ptr mem) => (MOVWUloadidx4 ptr idx mem)
   916  (MOVHloadidx  (ADD idx idx) ptr mem) => (MOVHloadidx2 ptr idx mem)
   917  (MOVHUloadidx (ADD idx idx) ptr mem) => (MOVHUloadidx2 ptr idx mem)
   918  (MOVDloadidx8  ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDload  [int32(c)<<3] ptr mem)
   919  (MOVWUloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWUload [int32(c)<<2] ptr mem)
   920  (MOVWloadidx4  ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWload  [int32(c)<<2] ptr mem)
   921  (MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
   922  (MOVHloadidx2  ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload  [int32(c)<<1] ptr mem)
   923  
   924  (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
   925  (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
   926  (FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
   927  (FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
   928  (FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
   929  (FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
   930  (FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
   931  (FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
   932  
   933  (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   934  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   935  	(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
   936  (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   937  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   938  	(MOVHstore [off1+int32(off2)] {sym} ptr val mem)
   939  (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   940  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   941  	(MOVWstore [off1+int32(off2)] {sym} ptr val mem)
   942  (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   943  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   944  	(MOVDstore [off1+int32(off2)] {sym} ptr val mem)
   945  (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2)
   946  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   947  	(STP [off1+int32(off2)] {sym} ptr val1 val2 mem)
   948  (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   949  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   950  	(FMOVSstore [off1+int32(off2)] {sym} ptr val mem)
   951  (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
   952  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   953  	(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
   954  (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   955  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   956  	(MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
   957  (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   958  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   959  	(MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
   960  (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   961  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   962  	(MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
   963  (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   964  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   965  	(MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
   966  (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
   967  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
   968  	(MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
   969  
   970  // register indexed store
   971  (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
   972  (MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem)
   973  (MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem)
   974  (MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem)
   975  (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem)
   976  (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem)
   977  (MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem)
   978  (MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem)
   979  (MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem)
   980  (MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem)
   981  (MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem)
   982  (MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem)
   983  (MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem)
   984  (MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem)
   985  (FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem)
   986  (FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem)
   987  (FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem)
   988  (FMOVSstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVSstore [int32(c)] idx val mem)
   989  
   990  // shifted register indexed store
   991  (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem)
   992  (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem)
   993  (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem)
   994  (MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem)
   995  (MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem)
   996  (MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
   997  (MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
   998  (MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem)
   999  (MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem)
  1000  (MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
  1001  (MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
  1002  (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem)
  1003  (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
  1004  (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
  1005  
  1006  (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
  1007  (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
  1008  (FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
  1009  (FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
  1010  (FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
  1011  (FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
  1012  (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
  1013  (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
  1014  
  1015  (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1016  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1017  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1018  	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1019  (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1020  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1021  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1022  	(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1023  (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1024  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1025  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1026  	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1027  (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1028  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1029  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1030  	(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1031  (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1032  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1033  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1034  	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1035  (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1036  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1037  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1038  	(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1039  (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1040  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1041  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1042  	(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1043  (LDP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1044  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1045  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1046  	(LDP [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1047  (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1048  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1049  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1050  	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1051  (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1052  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1053  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1054  	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1055  
  1056  (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1057  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1058  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1059  	(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1060  (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1061  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1062  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1063  	(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1064  (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1065  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1066  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1067  	(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1068  (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1069  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1070  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1071  	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1072  (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
  1073  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1074  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1075  	(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
  1076  (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1077  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1078  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1079  	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1080  (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
  1081  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1082  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1083  	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
  1084  (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1085  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1086  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1087  	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1088  (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1089  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1090  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1091  	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1092  (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1093  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1094  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1095  	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1096  (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1097  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1098  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1099  	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1100  (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
  1101  	&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
  1102  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
  1103  	(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1104  
  1105  // store zero
  1106  (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
  1107  (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem)
  1108  (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
  1109  (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
  1110  (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) => (MOVQstorezero [off] {sym} ptr mem)
  1111  
  1112  // register indexed store zero
  1113  (MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx ptr idx mem)
  1114  (MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx ptr idx mem)
  1115  (MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx ptr idx mem)
  1116  (MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBstorezeroidx ptr idx mem)
  1117  (MOVDstoreidx ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx ptr idx mem)
  1118  (MOVWstoreidx ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx ptr idx mem)
  1119  (MOVHstoreidx ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx ptr idx mem)
  1120  (MOVBstoreidx ptr idx (MOVDconst [0]) mem) => (MOVBstorezeroidx ptr idx mem)
  1121  (MOVDstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDstorezero [int32(c)] ptr mem)
  1122  (MOVDstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVDstorezero [int32(c)] idx mem)
  1123  (MOVWstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWstorezero [int32(c)] ptr mem)
  1124  (MOVWstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVWstorezero [int32(c)] idx mem)
  1125  (MOVHstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHstorezero [int32(c)] ptr mem)
  1126  (MOVHstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVHstorezero [int32(c)] idx mem)
  1127  (MOVBstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBstorezero [int32(c)] ptr mem)
  1128  (MOVBstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVBstorezero [int32(c)] idx mem)
  1129  
  1130  // shifted register indexed store zero
  1131  (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx8 ptr idx mem)
  1132  (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx4 ptr idx mem)
  1133  (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx2 ptr idx mem)
  1134  (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem)
  1135  (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem)
  1136  (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1137  (MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1138  (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem)
  1139  (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem)
  1140  (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
  1141  (MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
  1142  (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem)
  1143  (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem)
  1144  (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem)
  1145  (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDstorezero [int32(c<<3)] ptr mem)
  1146  (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWstorezero [int32(c<<2)] ptr mem)
  1147  (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHstorezero [int32(c<<1)] ptr mem)
  1148  
  1149  // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
  1150  // these seem to have bad interaction with other rules, resulting in slower code
  1151  //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
  1152  //(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
  1153  //(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
  1154  //(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
  1155  //(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
  1156  //(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
  1157  //(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1158  //(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y
  1159  //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1160  //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1161  
  1162  (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1163  (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1164  (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1165  (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1166  (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1167  (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1168  (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
  1169  
  1170  (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1171  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1172  (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1173  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1174  (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1175  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1176  (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1177  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1178  (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1179  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1180  (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1181  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1182  (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
  1183  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
  1184  
  1185  (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1186  (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1187  (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1188  (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1189  (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
  1190  
  1191  // don't extend after proper load
  1192  (MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
  1193  (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1194  (MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
  1195  (MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
  1196  (MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
  1197  (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1198  (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
  1199  (MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
  1200  (MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
  1201  (MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
  1202  (MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
  1203  (MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
  1204  (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
  1205  (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
  1206  (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
  1207  (MOVBreg x:(MOVBloadidx _  _ _)) => (MOVDreg x)
  1208  (MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1209  (MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
  1210  (MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1211  (MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
  1212  (MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1213  (MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1214  (MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
  1215  (MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1216  (MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
  1217  (MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1218  (MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
  1219  (MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
  1220  (MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
  1221  (MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
  1222  (MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
  1223  (MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1224  (MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
  1225  (MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1226  (MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
  1227  (MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
  1228  (MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x)
  1229  
  1230  // fold double extensions
  1231  (MOVBreg x:(MOVBreg _)) => (MOVDreg x)
  1232  (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
  1233  (MOVHreg x:(MOVBreg _)) => (MOVDreg x)
  1234  (MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
  1235  (MOVHreg x:(MOVHreg _)) => (MOVDreg x)
  1236  (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
  1237  (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
  1238  (MOVWreg x:(MOVBreg _)) => (MOVDreg x)
  1239  (MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
  1240  (MOVWreg x:(MOVHreg _)) => (MOVDreg x)
  1241  (MOVWreg x:(MOVWreg _)) => (MOVDreg x)
  1242  (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
  1243  (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
  1244  (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
  1245  
  1246  // don't extend before store
  1247  (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1248  (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1249  (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1250  (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1251  (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1252  (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
  1253  (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1254  (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1255  (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1256  (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
  1257  (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
  1258  (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
  1259  (MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1260  (MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1261  (MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1262  (MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1263  (MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1264  (MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem)
  1265  (MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1266  (MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1267  (MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1268  (MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem)
  1269  (MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem)
  1270  (MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem)
  1271  (MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1272  (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1273  (MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1274  (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
  1275  (MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
  1276  (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
  1277  
  1278  // if a register move has only 1 use, just use the same register without emitting instruction
  1279  // MOVDnop doesn't emit instruction, only for ensuring the type.
  1280  (MOVDreg x) && x.Uses == 1 => (MOVDnop x)
  1281  
  1282  // TODO: we should be able to get rid of MOVDnop all together.
  1283  // But for now, this is enough to get rid of lots of them.
  1284  (MOVDnop (MOVDconst [c])) => (MOVDconst [c])
  1285  
  1286  // fold constant into arithmetic ops
  1287  (ADD x (MOVDconst [c])) => (ADDconst [c] x)
  1288  (SUB x (MOVDconst [c])) => (SUBconst [c] x)
  1289  (AND x (MOVDconst [c])) => (ANDconst [c] x)
  1290  (OR  x (MOVDconst [c])) => (ORconst  [c] x)
  1291  (XOR x (MOVDconst [c])) => (XORconst [c] x)
  1292  (TST x (MOVDconst [c])) => (TSTconst [c] x)
  1293  (TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x)
  1294  (CMN x (MOVDconst [c])) => (CMNconst [c] x)
  1295  (CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x)
  1296  (BIC x (MOVDconst [c])) => (ANDconst [^c] x)
  1297  (EON x (MOVDconst [c])) => (XORconst [^c] x)
  1298  (ORN x (MOVDconst [c])) => (ORconst  [^c] x)
  1299  
  1300  (SLL x (MOVDconst [c])) => (SLLconst x [c&63])
  1301  (SRL x (MOVDconst [c])) => (SRLconst x [c&63])
  1302  (SRA x (MOVDconst [c])) => (SRAconst x [c&63])
  1303  (SLL x (ANDconst [63] y)) => (SLL x y)
  1304  (SRL x (ANDconst [63] y)) => (SRL x y)
  1305  (SRA x (ANDconst [63] y)) => (SRA x y)
  1306  
  1307  (CMP x (MOVDconst [c])) => (CMPconst [c] x)
  1308  (CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))
  1309  (CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x)
  1310  (CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x))
  1311  
  1312  (ROR x (MOVDconst [c])) => (RORconst x [c&63])
  1313  (RORW x (MOVDconst [c])) => (RORWconst x [c&31])
  1314  
  1315  (ADDSflags x (MOVDconst [c]))  => (ADDSconstflags [c] x)
  1316  
  1317  (ADDconst [c] y) && c < 0 => (SUBconst [-c] y)
  1318  
  1319  // Canonicalize the order of arguments to comparisons - helps with CSE.
  1320  ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x))
  1321  
  1322  // mul-neg => mneg
  1323  (NEG (MUL x y)) => (MNEG x y)
  1324  (NEG (MULW x y)) => (MNEGW x y)
  1325  (MUL (NEG x) y) => (MNEG x y)
  1326  (MULW (NEG x) y) => (MNEGW x y)
  1327  
  1328  // madd/msub
  1329  (ADD a l:(MUL  x y)) && l.Uses==1 && clobber(l) => (MADD a x y)
  1330  (SUB a l:(MUL  x y)) && l.Uses==1 && clobber(l) => (MSUB a x y)
  1331  (ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MSUB a x y)
  1332  (SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MADD a x y)
  1333  
  1334  (ADD a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MADDW a x y)
  1335  (SUB a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MSUBW a x y)
  1336  (ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MSUBW a x y)
  1337  (SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) => (MADDW a x y)
  1338  
  1339  // optimize ADCSflags, SBCSflags and friends
  1340  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) => (ADCSflags x y c)
  1341  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (MOVDconst [0])))) => (ADDSflags x y)
  1342  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo))))) => (SBCSflags x y bo)
  1343  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0])))) => (SUBSflags x y)
  1344  
  1345  // mul by constant
  1346  (MUL x (MOVDconst [-1])) => (NEG x)
  1347  (MUL _ (MOVDconst [0])) => (MOVDconst [0])
  1348  (MUL x (MOVDconst [1])) => x
  1349  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
  1350  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log64(c-1)])
  1351  (MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
  1352  (MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1353  (MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1354  (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1355  (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1356  
  1357  (MULW x (MOVDconst [c])) && int32(c)==-1 => (NEG x)
  1358  (MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
  1359  (MULW x (MOVDconst [c])) && int32(c)==1 => x
  1360  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
  1361  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log64(c-1)])
  1362  (MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)])
  1363  (MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1364  (MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1365  (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1366  (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1367  
  1368  // mneg by constant
  1369  (MNEG x (MOVDconst [-1])) => x
  1370  (MNEG _ (MOVDconst [0])) => (MOVDconst [0])
  1371  (MNEG x (MOVDconst [1])) => (NEG x)
  1372  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x))
  1373  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1374  (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)]))
  1375  (MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1376  (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1377  (MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1378  (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1379  
  1380  
  1381  (MNEGW x (MOVDconst [c])) && int32(c)==-1 => x
  1382  (MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
  1383  (MNEGW x (MOVDconst [c])) && int32(c)==1 => (NEG x)
  1384  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x))
  1385  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1386  (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)]))
  1387  (MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1388  (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1389  (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1390  (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1391  
  1392  
  1393  (MADD a x (MOVDconst [-1])) => (SUB a x)
  1394  (MADD a _ (MOVDconst [0])) => a
  1395  (MADD a x (MOVDconst [1])) => (ADD a x)
  1396  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1397  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1398  (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1399  (MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1400  (MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1401  (MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1402  (MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1403  
  1404  (MADD a (MOVDconst [-1]) x) => (SUB a x)
  1405  (MADD a (MOVDconst [0]) _) => a
  1406  (MADD a (MOVDconst [1]) x) => (ADD a x)
  1407  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1408  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1409  (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1410  (MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1411  (MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1412  (MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1413  (MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1414  
  1415  (MADDW a x (MOVDconst [c])) && int32(c)==-1 => (SUB a x)
  1416  (MADDW a _ (MOVDconst [c])) && int32(c)==0 => a
  1417  (MADDW a x (MOVDconst [c])) && int32(c)==1 => (ADD a x)
  1418  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1419  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1420  (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1421  (MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1422  (MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1423  (MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1424  (MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1425  
  1426  (MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (SUB a x)
  1427  (MADDW a (MOVDconst [c]) _) && int32(c)==0 => a
  1428  (MADDW a (MOVDconst [c]) x) && int32(c)==1 => (ADD a x)
  1429  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
  1430  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1431  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1432  (MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1433  (MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1434  (MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1435  (MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1436  
  1437  (MSUB a x (MOVDconst [-1])) => (ADD a x)
  1438  (MSUB a _ (MOVDconst [0])) => a
  1439  (MSUB a x (MOVDconst [1])) => (SUB a x)
  1440  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1441  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1442  (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1443  (MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1444  (MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1445  (MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1446  (MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1447  
  1448  (MSUB a (MOVDconst [-1]) x) => (ADD a x)
  1449  (MSUB a (MOVDconst [0]) _) => a
  1450  (MSUB a (MOVDconst [1]) x) => (SUB a x)
  1451  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1452  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1453  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1454  (MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1455  (MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1456  (MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1457  (MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1458  
  1459  (MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (ADD a x)
  1460  (MSUBW a _ (MOVDconst [c])) && int32(c)==0 => a
  1461  (MSUBW a x (MOVDconst [c])) && int32(c)==1 => (SUB a x)
  1462  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1463  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1464  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1465  (MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1466  (MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1467  (MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1468  (MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1469  
  1470  (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (ADD a x)
  1471  (MSUBW a (MOVDconst [c]) _) && int32(c)==0 => a
  1472  (MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (SUB a x)
  1473  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
  1474  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)]))
  1475  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)]))
  1476  (MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])
  1477  (MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])
  1478  (MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])
  1479  (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
  1480  
  1481  // div by constant
  1482  (UDIV x (MOVDconst [1])) => x
  1483  (UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x)
  1484  (UDIVW x (MOVDconst [c])) && uint32(c)==1 => x
  1485  (UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x)
  1486  (UMOD _ (MOVDconst [1])) => (MOVDconst [0])
  1487  (UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
  1488  (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0])
  1489  (UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x)
  1490  
  1491  // generic simplifications
  1492  (ADD x (NEG y)) => (SUB x y)
  1493  (SUB x x) => (MOVDconst [0])
  1494  (AND x x) => x
  1495  (OR  x x) => x
  1496  (XOR x x) => (MOVDconst [0])
  1497  (BIC x x) => (MOVDconst [0])
  1498  (EON x x) => (MOVDconst [-1])
  1499  (ORN x x) => (MOVDconst [-1])
  1500  (AND x (MVN y)) => (BIC x y)
  1501  (XOR x (MVN y)) => (EON x y)
  1502  (OR  x (MVN y)) => (ORN x y)
  1503  (MVN (XOR x y)) => (EON x y)
  1504  (NEG (NEG x)) => x
  1505  
  1506  (CSEL [cc] (MOVDconst [-1]) (MOVDconst [0]) flag) => (CSETM [cc] flag)
  1507  (CSEL [cc] (MOVDconst [0]) (MOVDconst [-1]) flag) => (CSETM [arm64Negate(cc)] flag)
  1508  (CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag)
  1509  (CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
  1510  (CSEL [cc] x (ADDconst [1] a) flag) => (CSINC [cc] x a flag)
  1511  (CSEL [cc] (ADDconst [1] a) x flag) => (CSINC [arm64Negate(cc)] x a flag)
  1512  (CSEL [cc] x (MVN a) flag) => (CSINV [cc] x a flag)
  1513  (CSEL [cc] (MVN a) x flag) => (CSINV [arm64Negate(cc)] x a flag)
  1514  (CSEL [cc] x (NEG a) flag) => (CSNEG [cc] x a flag)
  1515  (CSEL [cc] (NEG a) x flag) => (CSNEG [arm64Negate(cc)] x a flag)
  1516  
  1517  (SUB x (SUB y z)) => (SUB (ADD <v.Type> x z) y)
  1518  (SUB (SUB x y) z) => (SUB x (ADD <y.Type> y z))
  1519  
  1520  // remove redundant *const ops
  1521  (ADDconst [0]  x) => x
  1522  (SUBconst [0]  x) => x
  1523  (ANDconst [0]  _) => (MOVDconst [0])
  1524  (ANDconst [-1] x) => x
  1525  (ORconst  [0]  x) => x
  1526  (ORconst  [-1] _) => (MOVDconst [-1])
  1527  (XORconst [0]  x) => x
  1528  (XORconst [-1] x) => (MVN x)
  1529  
  1530  // generic constant folding
  1531  (ADDconst [c] (MOVDconst [d]))  => (MOVDconst [c+d])
  1532  (ADDconst [c] (ADDconst [d] x)) => (ADDconst [c+d] x)
  1533  (ADDconst [c] (SUBconst [d] x)) => (ADDconst [c-d] x)
  1534  (SUBconst [c] (MOVDconst [d]))  => (MOVDconst [d-c])
  1535  (SUBconst [c] (SUBconst [d] x)) => (ADDconst [-c-d] x)
  1536  (SUBconst [c] (ADDconst [d] x)) => (ADDconst [-c+d] x)
  1537  (SLLconst [c] (MOVDconst [d]))  => (MOVDconst [d<<uint64(c)])
  1538  (SRLconst [c] (MOVDconst [d]))  => (MOVDconst [int64(uint64(d)>>uint64(c))])
  1539  (SRAconst [c] (MOVDconst [d]))  => (MOVDconst [d>>uint64(c)])
  1540  (MUL   (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c*d])
  1541  (MULW  (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [int64(int32(c)*int32(d))])
  1542  (MNEG  (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [-c*d])
  1543  (MNEGW (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [-int64(int32(c)*int32(d))])
  1544  (MADD  (MOVDconst [c]) x y) => (ADDconst [c] (MUL   <x.Type> x y))
  1545  (MADDW (MOVDconst [c]) x y) => (ADDconst [c] (MULW  <x.Type> x y))
  1546  (MSUB  (MOVDconst [c]) x y) => (ADDconst [c] (MNEG  <x.Type> x y))
  1547  (MSUBW (MOVDconst [c]) x y) => (ADDconst [c] (MNEGW <x.Type> x y))
  1548  (MADD  a (MOVDconst [c]) (MOVDconst [d])) => (ADDconst [c*d] a)
  1549  (MADDW a (MOVDconst [c]) (MOVDconst [d])) => (ADDconst [int64(int32(c)*int32(d))] a)
  1550  (MSUB  a (MOVDconst [c]) (MOVDconst [d])) => (SUBconst [c*d] a)
  1551  (MSUBW a (MOVDconst [c]) (MOVDconst [d])) => (SUBconst [int64(int32(c)*int32(d))] a)
  1552  (DIV   (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c/d])
  1553  (UDIV  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)/uint64(d))])
  1554  (DIVW  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(int32(c)/int32(d))])
  1555  (UDIVW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)/uint32(d))])
  1556  (MOD   (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c%d])
  1557  (UMOD  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)%uint64(d))])
  1558  (MODW  (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(int32(c)%int32(d))])
  1559  (UMODW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)%uint32(d))])
  1560  (ANDconst [c] (MOVDconst [d]))  => (MOVDconst [c&d])
  1561  (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x)
  1562  (ANDconst [c] (MOVWUreg x)) => (ANDconst [c&(1<<32-1)] x)
  1563  (ANDconst [c] (MOVHUreg x)) => (ANDconst [c&(1<<16-1)] x)
  1564  (ANDconst [c] (MOVBUreg x)) => (ANDconst [c&(1<<8-1)] x)
  1565  (MOVWUreg (ANDconst [c] x)) => (ANDconst [c&(1<<32-1)] x)
  1566  (MOVHUreg (ANDconst [c] x)) => (ANDconst [c&(1<<16-1)] x)
  1567  (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&(1<<8-1)] x)
  1568  (ORconst  [c] (MOVDconst [d]))  => (MOVDconst [c|d])
  1569  (ORconst  [c] (ORconst [d] x))  => (ORconst [c|d] x)
  1570  (XORconst [c] (MOVDconst [d]))  => (MOVDconst [c^d])
  1571  (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x)
  1572  (MVN (MOVDconst [c])) => (MOVDconst [^c])
  1573  (NEG (MOVDconst [c])) => (MOVDconst [-c])
  1574  (MOVBreg  (MOVDconst [c])) => (MOVDconst [int64(int8(c))])
  1575  (MOVBUreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))])
  1576  (MOVHreg  (MOVDconst [c])) => (MOVDconst [int64(int16(c))])
  1577  (MOVHUreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))])
  1578  (MOVWreg  (MOVDconst [c])) => (MOVDconst [int64(int32(c))])
  1579  (MOVWUreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))])
  1580  (MOVDreg  (MOVDconst [c])) => (MOVDconst [c])
  1581  
  1582  // constant comparisons
  1583  (CMPconst  (MOVDconst [x]) [y]) => (FlagConstant [subFlags64(x,y)])
  1584  (CMPWconst (MOVDconst [x]) [y]) => (FlagConstant [subFlags32(int32(x),y)])
  1585  (TSTconst  (MOVDconst [x]) [y]) => (FlagConstant [logicFlags64(x&y)])
  1586  (TSTWconst (MOVDconst [x]) [y]) => (FlagConstant [logicFlags32(int32(x)&y)])
  1587  (CMNconst  (MOVDconst [x]) [y]) => (FlagConstant [addFlags64(x,y)])
  1588  (CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)])
  1589  
  1590  // other known comparisons
  1591  (CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
  1592  (CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
  1593  (CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)])
  1594  (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)])
  1595  (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) => (FlagConstant [subFlags64(0,1)])
  1596  (CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
  1597  (CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
  1598  
  1599  // absorb flag constants into branches
  1600  (EQ (FlagConstant [fc]) yes no) &&  fc.eq() => (First yes no)
  1601  (EQ (FlagConstant [fc]) yes no) && !fc.eq() => (First no yes)
  1602  
  1603  (NE (FlagConstant [fc]) yes no) &&  fc.ne() => (First yes no)
  1604  (NE (FlagConstant [fc]) yes no) && !fc.ne() => (First no yes)
  1605  
  1606  (LT (FlagConstant [fc]) yes no) &&  fc.lt() => (First yes no)
  1607  (LT (FlagConstant [fc]) yes no) && !fc.lt() => (First no yes)
  1608  
  1609  (LE (FlagConstant [fc]) yes no) &&  fc.le() => (First yes no)
  1610  (LE (FlagConstant [fc]) yes no) && !fc.le() => (First no yes)
  1611  
  1612  (GT (FlagConstant [fc]) yes no) &&  fc.gt() => (First yes no)
  1613  (GT (FlagConstant [fc]) yes no) && !fc.gt() => (First no yes)
  1614  
  1615  (GE (FlagConstant [fc]) yes no) &&  fc.ge() => (First yes no)
  1616  (GE (FlagConstant [fc]) yes no) && !fc.ge() => (First no yes)
  1617  
  1618  (ULT (FlagConstant [fc]) yes no) &&  fc.ult() => (First yes no)
  1619  (ULT (FlagConstant [fc]) yes no) && !fc.ult() => (First no yes)
  1620  
  1621  (ULE (FlagConstant [fc]) yes no) &&  fc.ule() => (First yes no)
  1622  (ULE (FlagConstant [fc]) yes no) && !fc.ule() => (First no yes)
  1623  
  1624  (UGT (FlagConstant [fc]) yes no) &&  fc.ugt() => (First yes no)
  1625  (UGT (FlagConstant [fc]) yes no) && !fc.ugt() => (First no yes)
  1626  
  1627  (UGE (FlagConstant [fc]) yes no) &&  fc.uge() => (First yes no)
  1628  (UGE (FlagConstant [fc]) yes no) && !fc.uge() => (First no yes)
  1629  
  1630  (LTnoov (FlagConstant [fc]) yes no) &&  fc.ltNoov() => (First yes no)
  1631  (LTnoov (FlagConstant [fc]) yes no) && !fc.ltNoov() => (First no yes)
  1632  
  1633  (LEnoov (FlagConstant [fc]) yes no) &&  fc.leNoov() => (First yes no)
  1634  (LEnoov (FlagConstant [fc]) yes no) && !fc.leNoov() => (First no yes)
  1635  
  1636  (GTnoov (FlagConstant [fc]) yes no) &&  fc.gtNoov() => (First yes no)
  1637  (GTnoov (FlagConstant [fc]) yes no) && !fc.gtNoov() => (First no yes)
  1638  
  1639  (GEnoov (FlagConstant [fc]) yes no) &&  fc.geNoov() => (First yes no)
  1640  (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes)
  1641  
  1642  (Z (MOVDconst [0]) yes no) => (First yes no)
  1643  (Z (MOVDconst [c]) yes no) && c != 0 => (First no yes)
  1644  (NZ (MOVDconst [0]) yes no) => (First no yes)
  1645  (NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no)
  1646  (ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no)
  1647  (ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes)
  1648  (NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes)
  1649  (NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no)
  1650  
  1651  // absorb InvertFlags into branches
  1652  (LT (InvertFlags cmp) yes no) => (GT cmp yes no)
  1653  (GT (InvertFlags cmp) yes no) => (LT cmp yes no)
  1654  (LE (InvertFlags cmp) yes no) => (GE cmp yes no)
  1655  (GE (InvertFlags cmp) yes no) => (LE cmp yes no)
  1656  (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no)
  1657  (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no)
  1658  (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no)
  1659  (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no)
  1660  (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no)
  1661  (NE (InvertFlags cmp) yes no) => (NE cmp yes no)
  1662  (FLT (InvertFlags cmp) yes no) => (FGT cmp yes no)
  1663  (FGT (InvertFlags cmp) yes no) => (FLT cmp yes no)
  1664  (FLE (InvertFlags cmp) yes no) => (FGE cmp yes no)
  1665  (FGE (InvertFlags cmp) yes no) => (FLE cmp yes no)
  1666  (LTnoov (InvertFlags cmp) yes no) => (GTnoov cmp yes no)
  1667  (GEnoov (InvertFlags cmp) yes no) => (LEnoov cmp yes no)
  1668  (LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no)
  1669  (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
  1670  
  1671  // absorb InvertFlags into conditional instructions
  1672  (CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
  1673  (CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
  1674  (CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp)
  1675  (CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp)
  1676  (CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp)
  1677  (CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp)
  1678  
  1679  // absorb flag constants into boolean values
  1680  (Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
  1681  (NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())])
  1682  (LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())])
  1683  (LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())])
  1684  (LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())])
  1685  (LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())])
  1686  (GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())])
  1687  (GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())])
  1688  (GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())])
  1689  (GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())])
  1690  
  1691  // absorb InvertFlags into boolean values
  1692  (Equal (InvertFlags x)) => (Equal x)
  1693  (NotEqual (InvertFlags x)) => (NotEqual x)
  1694  (LessThan (InvertFlags x)) => (GreaterThan x)
  1695  (LessThanU (InvertFlags x)) => (GreaterThanU x)
  1696  (GreaterThan (InvertFlags x)) => (LessThan x)
  1697  (GreaterThanU (InvertFlags x)) => (LessThanU x)
  1698  (LessEqual (InvertFlags x)) => (GreaterEqual x)
  1699  (LessEqualU (InvertFlags x)) => (GreaterEqualU x)
  1700  (GreaterEqual (InvertFlags x)) => (LessEqual x)
  1701  (GreaterEqualU (InvertFlags x)) => (LessEqualU x)
  1702  (LessThanF (InvertFlags x)) => (GreaterThanF x)
  1703  (LessEqualF (InvertFlags x)) => (GreaterEqualF x)
  1704  (GreaterThanF (InvertFlags x)) => (LessThanF x)
  1705  (GreaterEqualF (InvertFlags x)) => (LessEqualF x)
  1706  
  1707  // Boolean-generating instructions (NOTE: NOT all boolean Values) always
  1708  // zero upper bit of the register; no need to zero-extend
  1709  (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x)
  1710  
  1711  // absorb flag constants into conditional instructions
  1712  (CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1713  (CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
  1714  (CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
  1715  (CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
  1716  (CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1717  (CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y)
  1718  (CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1719  (CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y)
  1720  (CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
  1721  (CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y)
  1722  (CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1])
  1723  (CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
  1724  
  1725  // absorb flags back into boolean CSEL
  1726  (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
  1727        (CSEL [boolval.Op] x y flagArg(boolval))
  1728  (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
  1729        (CSEL [arm64Negate(boolval.Op)] x y flagArg(boolval))
  1730  (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
  1731        (CSEL0 [boolval.Op] x flagArg(boolval))
  1732  (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
  1733        (CSEL0 [arm64Negate(boolval.Op)] x flagArg(boolval))
  1734  
  1735  // absorb shifts into ops
  1736  (NEG x:(SLLconst [c] y)) && clobberIfDead(x) => (NEGshiftLL [c] y)
  1737  (NEG x:(SRLconst [c] y)) && clobberIfDead(x) => (NEGshiftRL [c] y)
  1738  (NEG x:(SRAconst [c] y)) && clobberIfDead(x) => (NEGshiftRA [c] y)
  1739  (MVN x:(SLLconst [c] y)) && clobberIfDead(x) => (MVNshiftLL [c] y)
  1740  (MVN x:(SRLconst [c] y)) && clobberIfDead(x) => (MVNshiftRL [c] y)
  1741  (MVN x:(SRAconst [c] y)) && clobberIfDead(x) => (MVNshiftRA [c] y)
  1742  (MVN x:(RORconst [c] y)) && clobberIfDead(x) => (MVNshiftRO [c] y)
  1743  (ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ADDshiftLL x0 y [c])
  1744  (ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ADDshiftRL x0 y [c])
  1745  (ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ADDshiftRA x0 y [c])
  1746  (SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (SUBshiftLL x0 y [c])
  1747  (SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (SUBshiftRL x0 y [c])
  1748  (SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (SUBshiftRA x0 y [c])
  1749  (AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ANDshiftLL x0 y [c])
  1750  (AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ANDshiftRL x0 y [c])
  1751  (AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ANDshiftRA x0 y [c])
  1752  (AND x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ANDshiftRO x0 y [c])
  1753  (OR  x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORshiftLL  x0 y [c]) // useful for combined load
  1754  (OR  x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORshiftRL  x0 y [c])
  1755  (OR  x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORshiftRA  x0 y [c])
  1756  (OR  x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORshiftRO  x0 y [c])
  1757  (XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (XORshiftLL x0 y [c])
  1758  (XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (XORshiftRL x0 y [c])
  1759  (XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (XORshiftRA x0 y [c])
  1760  (XOR x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (XORshiftRO x0 y [c])
  1761  (BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (BICshiftLL x0 y [c])
  1762  (BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (BICshiftRL x0 y [c])
  1763  (BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (BICshiftRA x0 y [c])
  1764  (BIC x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (BICshiftRO x0 y [c])
  1765  (ORN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORNshiftLL x0 y [c])
  1766  (ORN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORNshiftRL x0 y [c])
  1767  (ORN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORNshiftRA x0 y [c])
  1768  (ORN x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORNshiftRO x0 y [c])
  1769  (EON x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (EONshiftLL x0 y [c])
  1770  (EON x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (EONshiftRL x0 y [c])
  1771  (EON x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (EONshiftRA x0 y [c])
  1772  (EON x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (EONshiftRO x0 y [c])
  1773  (CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMPshiftLL x0 y [c])
  1774  (CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftLL x1 y [c]))
  1775  (CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMPshiftRL x0 y [c])
  1776  (CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRL x1 y [c]))
  1777  (CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMPshiftRA x0 y [c])
  1778  (CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRA x1 y [c]))
  1779  (CMN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMNshiftLL x0 y [c])
  1780  (CMN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMNshiftRL x0 y [c])
  1781  (CMN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMNshiftRA x0 y [c])
  1782  (TST x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (TSTshiftLL x0 y [c])
  1783  (TST x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (TSTshiftRL x0 y [c])
  1784  (TST x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (TSTshiftRA x0 y [c])
  1785  (TST x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (TSTshiftRO x0 y [c])
  1786  
  1787  // prefer *const ops to *shift ops
  1788  (ADDshiftLL (MOVDconst [c]) x [d]) => (ADDconst [c] (SLLconst <x.Type> x [d]))
  1789  (ADDshiftRL (MOVDconst [c]) x [d]) => (ADDconst [c] (SRLconst <x.Type> x [d]))
  1790  (ADDshiftRA (MOVDconst [c]) x [d]) => (ADDconst [c] (SRAconst <x.Type> x [d]))
  1791  (ANDshiftLL (MOVDconst [c]) x [d]) => (ANDconst [c] (SLLconst <x.Type> x [d]))
  1792  (ANDshiftRL (MOVDconst [c]) x [d]) => (ANDconst [c] (SRLconst <x.Type> x [d]))
  1793  (ANDshiftRA (MOVDconst [c]) x [d]) => (ANDconst [c] (SRAconst <x.Type> x [d]))
  1794  (ANDshiftRO (MOVDconst [c]) x [d]) => (ANDconst [c] (RORconst <x.Type> x [d]))
  1795  (ORshiftLL  (MOVDconst [c]) x [d]) => (ORconst  [c] (SLLconst <x.Type> x [d]))
  1796  (ORshiftRL  (MOVDconst [c]) x [d]) => (ORconst  [c] (SRLconst <x.Type> x [d]))
  1797  (ORshiftRA  (MOVDconst [c]) x [d]) => (ORconst  [c] (SRAconst <x.Type> x [d]))
  1798  (ORshiftRO  (MOVDconst [c]) x [d]) => (ORconst  [c] (RORconst <x.Type> x [d]))
  1799  (XORshiftLL (MOVDconst [c]) x [d]) => (XORconst [c] (SLLconst <x.Type> x [d]))
  1800  (XORshiftRL (MOVDconst [c]) x [d]) => (XORconst [c] (SRLconst <x.Type> x [d]))
  1801  (XORshiftRA (MOVDconst [c]) x [d]) => (XORconst [c] (SRAconst <x.Type> x [d]))
  1802  (XORshiftRO (MOVDconst [c]) x [d]) => (XORconst [c] (RORconst <x.Type> x [d]))
  1803  (CMPshiftLL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
  1804  (CMPshiftRL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
  1805  (CMPshiftRA (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
  1806  (CMNshiftLL (MOVDconst [c]) x [d]) => (CMNconst [c] (SLLconst <x.Type> x [d]))
  1807  (CMNshiftRL (MOVDconst [c]) x [d]) => (CMNconst [c] (SRLconst <x.Type> x [d]))
  1808  (CMNshiftRA (MOVDconst [c]) x [d]) => (CMNconst [c] (SRAconst <x.Type> x [d]))
  1809  (TSTshiftLL (MOVDconst [c]) x [d]) => (TSTconst [c] (SLLconst <x.Type> x [d]))
  1810  (TSTshiftRL (MOVDconst [c]) x [d]) => (TSTconst [c] (SRLconst <x.Type> x [d]))
  1811  (TSTshiftRA (MOVDconst [c]) x [d]) => (TSTconst [c] (SRAconst <x.Type> x [d]))
  1812  (TSTshiftRO (MOVDconst [c]) x [d]) => (TSTconst [c] (RORconst <x.Type> x [d]))
  1813  
  1814  // constant folding in *shift ops
  1815  (MVNshiftLL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)<<uint64(d))])
  1816  (MVNshiftRL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)>>uint64(d))])
  1817  (MVNshiftRA (MOVDconst [c]) [d]) => (MOVDconst [^(c>>uint64(d))])
  1818  (MVNshiftRO (MOVDconst [c]) [d]) => (MOVDconst [^rotateRight64(c, d)])
  1819  (NEGshiftLL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)<<uint64(d))])
  1820  (NEGshiftRL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)>>uint64(d))])
  1821  (NEGshiftRA (MOVDconst [c]) [d]) => (MOVDconst [-(c>>uint64(d))])
  1822  (ADDshiftLL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)<<uint64(d))])
  1823  (ADDshiftRL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)>>uint64(d))])
  1824  (ADDshiftRA x (MOVDconst [c]) [d]) => (ADDconst x [c>>uint64(d)])
  1825  (SUBshiftLL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)<<uint64(d))])
  1826  (SUBshiftRL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)>>uint64(d))])
  1827  (SUBshiftRA x (MOVDconst [c]) [d]) => (SUBconst x [c>>uint64(d)])
  1828  (ANDshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)<<uint64(d))])
  1829  (ANDshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)>>uint64(d))])
  1830  (ANDshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [c>>uint64(d)])
  1831  (ANDshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [rotateRight64(c, d)])
  1832  (ORshiftLL  x (MOVDconst [c]) [d]) => (ORconst  x [int64(uint64(c)<<uint64(d))])
  1833  (ORshiftRL  x (MOVDconst [c]) [d]) => (ORconst  x [int64(uint64(c)>>uint64(d))])
  1834  (ORshiftRA  x (MOVDconst [c]) [d]) => (ORconst  x [c>>uint64(d)])
  1835  (ORshiftRO  x (MOVDconst [c]) [d]) => (ORconst  x [rotateRight64(c, d)])
  1836  (XORshiftLL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)<<uint64(d))])
  1837  (XORshiftRL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)>>uint64(d))])
  1838  (XORshiftRA x (MOVDconst [c]) [d]) => (XORconst x [c>>uint64(d)])
  1839  (XORshiftRO x (MOVDconst [c]) [d]) => (XORconst x [rotateRight64(c, d)])
  1840  (BICshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)<<uint64(d))])
  1841  (BICshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)>>uint64(d))])
  1842  (BICshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [^(c>>uint64(d))])
  1843  (BICshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [^rotateRight64(c, d)])
  1844  (ORNshiftLL x (MOVDconst [c]) [d]) => (ORconst  x [^int64(uint64(c)<<uint64(d))])
  1845  (ORNshiftRL x (MOVDconst [c]) [d]) => (ORconst  x [^int64(uint64(c)>>uint64(d))])
  1846  (ORNshiftRA x (MOVDconst [c]) [d]) => (ORconst  x [^(c>>uint64(d))])
  1847  (ORNshiftRO x (MOVDconst [c]) [d]) => (ORconst  x [^rotateRight64(c, d)])
  1848  (EONshiftLL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)<<uint64(d))])
  1849  (EONshiftRL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)>>uint64(d))])
  1850  (EONshiftRA x (MOVDconst [c]) [d]) => (XORconst x [^(c>>uint64(d))])
  1851  (EONshiftRO x (MOVDconst [c]) [d]) => (XORconst x [^rotateRight64(c, d)])
  1852  (CMPshiftLL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)<<uint64(d))])
  1853  (CMPshiftRL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)>>uint64(d))])
  1854  (CMPshiftRA x (MOVDconst [c]) [d]) => (CMPconst x [c>>uint64(d)])
  1855  (CMNshiftLL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)<<uint64(d))])
  1856  (CMNshiftRL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)>>uint64(d))])
  1857  (CMNshiftRA x (MOVDconst [c]) [d]) => (CMNconst x [c>>uint64(d)])
  1858  (TSTshiftLL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)<<uint64(d))])
  1859  (TSTshiftRL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)>>uint64(d))])
  1860  (TSTshiftRA x (MOVDconst [c]) [d]) => (TSTconst x [c>>uint64(d)])
  1861  (TSTshiftRO x (MOVDconst [c]) [d]) => (TSTconst x [rotateRight64(c, d)])
  1862  
  1863  // simplification with *shift ops
  1864  (SUBshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1865  (SUBshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1866  (SUBshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1867  (ANDshiftLL y:(SLLconst x [c]) x [c]) => y
  1868  (ANDshiftRL y:(SRLconst x [c]) x [c]) => y
  1869  (ANDshiftRA y:(SRAconst x [c]) x [c]) => y
  1870  (ANDshiftRO y:(RORconst x [c]) x [c]) => y
  1871  (ORshiftLL  y:(SLLconst x [c]) x [c]) => y
  1872  (ORshiftRL  y:(SRLconst x [c]) x [c]) => y
  1873  (ORshiftRA  y:(SRAconst x [c]) x [c]) => y
  1874  (ORshiftRO  y:(RORconst x [c]) x [c]) => y
  1875  (XORshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1876  (XORshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1877  (XORshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1878  (XORshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0])
  1879  (BICshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0])
  1880  (BICshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0])
  1881  (BICshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0])
  1882  (BICshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0])
  1883  (EONshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
  1884  (EONshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
  1885  (EONshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
  1886  (EONshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1])
  1887  (ORNshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1])
  1888  (ORNshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1])
  1889  (ORNshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1])
  1890  (ORNshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1])
  1891  
  1892  // rev16w | rev16
  1893  // ((x>>8) | (x<<8)) => (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
  1894  ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) => (REV16W x)
  1895  
  1896  // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
  1897  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
  1898  	&& uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
  1899  	=> (REV16W x)
  1900  
  1901  // ((x & 0xff00ff00ff00ff00)>>8) | ((x & 0x00ff00ff00ff00ff)<<8), "|" can also be "^" or "+".
  1902  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
  1903  	&& (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
  1904  	=> (REV16 x)
  1905  
  1906  // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
  1907  ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
  1908  	&& (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
  1909  	=> (REV16 (ANDconst <x.Type> [0xffffffff] x))
  1910  
  1911  // Extract from reg pair
  1912  (ADDshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1913  ( ORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1914  (XORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
  1915  
  1916  (ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1917  	=> (EXTRWconst [32-c] x2 x)
  1918  ( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1919  	=> (EXTRWconst [32-c] x2 x)
  1920  (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1921  	=> (EXTRWconst [32-c] x2 x)
  1922  
  1923  // Rewrite special pairs of shifts to AND.
  1924  // On ARM64 the bitmask can fit into an instruction.
  1925  (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 => (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
  1926  (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 => (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
  1927  
  1928  // Special case setting bit as 1. An example is math.Copysign(c,-1)
  1929  (ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0  => (ORconst [c1] x)
  1930  
  1931  // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0.
  1932  (MOVWUreg (SLLconst [lc] x)) && lc >= 32 => (MOVDconst [0])
  1933  (MOVHUreg (SLLconst [lc] x)) && lc >= 16 => (MOVDconst [0])
  1934  (MOVBUreg (SLLconst [lc] x)) && lc >= 8 => (MOVDconst [0])
  1935  
  1936  // After zero extension, the upper (64-datasize(32|16|8)) bits are zero, we can optimiza to constant 0.
  1937  (SRLconst [rc] (MOVWUreg x)) && rc >= 32 => (MOVDconst [0])
  1938  (SRLconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVDconst [0])
  1939  (SRLconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVDconst [0])
  1940  
  1941  // bitfield ops
  1942  
  1943  // sbfiz
  1944  // (x << lc) >> rc
  1945  (SRAconst [rc] (SLLconst [lc] x)) && lc > rc => (SBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1946  // int64(x << lc)
  1947  (MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
  1948  (MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
  1949  (MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
  1950  // int64(x) << lc
  1951  (SLLconst [lc] (MOVWreg x))  => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
  1952  (SLLconst [lc] (MOVHreg x))  => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
  1953  (SLLconst [lc] (MOVBreg x))  => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
  1954  
  1955  // sbfx
  1956  // (x << lc) >> rc
  1957  (SRAconst [rc] (SLLconst [lc] x)) && lc <= rc => (SBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1958  // int64(x) >> rc
  1959  (SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x)
  1960  (SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x)
  1961  (SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x)
  1962  // merge sbfx and sign-extension into sbfx
  1963  (MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x)
  1964  (MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x)
  1965  (MOVBreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <=  8 => (SBFX [bfc] x)
  1966  
  1967  // sbfiz/sbfx combinations: merge shifts into bitfield ops
  1968  (SRAconst [sc] (SBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb()
  1969  	=> (SBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  1970  (SRAconst [sc] (SBFIZ [bfc] x)) && sc >= bfc.getARM64BFlsb()
  1971  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  1972  	=> (SBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  1973  
  1974  // ubfiz
  1975  // (x << lc) >> rc
  1976  (SRLconst [rc] (SLLconst [lc] x)) && lc > rc => (UBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1977  // uint64(x) << lc
  1978  (SLLconst [lc] (MOVWUreg x))  => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
  1979  (SLLconst [lc] (MOVHUreg x))  => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
  1980  (SLLconst [lc] (MOVBUreg x))  => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
  1981  // uint64(x << lc)
  1982  (MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x)
  1983  (MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x)
  1984  (MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x)
  1985  
  1986  // merge ANDconst into ubfiz
  1987  // (x & ac) << sc
  1988  (SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
  1989  	=> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1990  // (x << sc) & ac
  1991  (ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
  1992  	=> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1993  
  1994  // ubfx
  1995  // (x << lc) >> rc
  1996  (SRLconst [rc] (SLLconst [lc] x)) && lc < rc => (UBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1997  // uint64(x) >> rc
  1998  (SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x)
  1999  (SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x)
  2000  (SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x)
  2001  // uint64(x >> rc)
  2002  (MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x)
  2003  (MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x)
  2004  (MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x)
  2005  // merge ANDconst into ubfx
  2006  // (x >> sc) & ac
  2007  (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
  2008  	=> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  2009  // (x & ac) >> sc
  2010  (SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
  2011  	=> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  2012  // merge ANDconst and ubfx into ubfx
  2013  (ANDconst [c] (UBFX [bfc] x)) && isARM64BFMask(0, c, 0) =>
  2014  	(UBFX [armBFAuxInt(bfc.getARM64BFlsb(), min(bfc.getARM64BFwidth(), arm64BFWidth(c, 0)))] x)
  2015  (UBFX [bfc] (ANDconst [c] x)) && isARM64BFMask(0, c, 0) && bfc.getARM64BFlsb() + bfc.getARM64BFwidth() <= arm64BFWidth(c, 0) =>
  2016  	(UBFX [bfc] x)
  2017  // merge ubfx and zerso-extension into ubfx
  2018  (MOVWUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (UBFX [bfc] x)
  2019  (MOVHUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (UBFX [bfc] x)
  2020  (MOVBUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <=  8 => (UBFX [bfc] x)
  2021  
  2022  // ubfiz/ubfx combinations: merge shifts into bitfield ops
  2023  (SRLconst [sc] (UBFX [bfc] x)) && sc < bfc.getARM64BFwidth()
  2024  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x)
  2025  (UBFX [bfc] (SRLconst [sc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64
  2026  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x)
  2027  (SLLconst [sc] (UBFIZ [bfc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64
  2028  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x)
  2029  (UBFIZ [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFwidth()
  2030  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x)
  2031  // ((x << c1) >> c2) >> c3
  2032  (SRLconst [sc] (UBFIZ [bfc] x)) && sc == bfc.getARM64BFlsb()
  2033  	=> (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x)
  2034  (SRLconst [sc] (UBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb()
  2035  	=> (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  2036  (SRLconst [sc] (UBFIZ [bfc] x)) && sc > bfc.getARM64BFlsb()
  2037  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  2038  	=> (UBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  2039  // ((x << c1) << c2) >> c3
  2040  (UBFX [bfc] (SLLconst [sc] x)) && sc == bfc.getARM64BFlsb()
  2041  	=> (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x)
  2042  (UBFX [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFlsb()
  2043  	=> (UBFX [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x)
  2044  (UBFX [bfc] (SLLconst [sc] x)) && sc > bfc.getARM64BFlsb()
  2045  	&& sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth()
  2046  	=> (UBFIZ [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x)
  2047  
  2048  // bfi
  2049  (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
  2050  	&& ac == ^((1<<uint(bfc.getARM64BFwidth())-1) << uint(bfc.getARM64BFlsb()))
  2051  	=> (BFI [bfc] y x)
  2052  (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
  2053  	&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
  2054  	=> (BFI [armBFAuxInt(lc-rc, 64-lc)] x y)
  2055  // bfxil
  2056  (OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(bfc.getARM64BFwidth())-1)
  2057  	=> (BFXIL [bfc] y x)
  2058  (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == bfc.getARM64BFwidth()
  2059  	=> (BFXIL [bfc] y x)
  2060  (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
  2061  	=> (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
  2062  
  2063  // do combined loads
  2064  // little endian loads
  2065  // b[0] | b[1]<<8 => load 16-bit
  2066  (ORshiftLL <t> [8]
  2067  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
  2068  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2069  	&& i1 == i0+1
  2070  	&& x0.Uses == 1 && x1.Uses == 1
  2071  	&& y0.Uses == 1 && y1.Uses == 1
  2072  	&& mergePoint(b,x0,x1) != nil
  2073  	&& clobber(x0, x1, y0, y1)
  2074  	=> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2075  (ORshiftLL <t> [8]
  2076  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
  2077  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2078  	&& s == nil
  2079  	&& x0.Uses == 1 && x1.Uses == 1
  2080  	&& y0.Uses == 1 && y1.Uses == 1
  2081  	&& mergePoint(b,x0,x1) != nil
  2082  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2083  	&& clobber(x0, x1, y0, y1)
  2084  	=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
  2085  (ORshiftLL <t> [8]
  2086  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
  2087  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2088  	&& x0.Uses == 1 && x1.Uses == 1
  2089  	&& y0.Uses == 1 && y1.Uses == 1
  2090  	&& mergePoint(b,x0,x1) != nil
  2091  	&& clobber(x0, x1, y0, y1)
  2092  	=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
  2093  
  2094  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit
  2095  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2096  	            x0:(MOVHUload [i0] {s} p mem)
  2097  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2098  	y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
  2099  	&& i2 == i0+2
  2100  	&& i3 == i0+3
  2101  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2102  	&& y1.Uses == 1 && y2.Uses == 1
  2103  	&& o0.Uses == 1
  2104  	&& mergePoint(b,x0,x1,x2) != nil
  2105  	&& clobber(x0, x1, x2, y1, y2, o0)
  2106  	=> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2107  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2108  	            x0:(MOVHUloadidx ptr0 idx0 mem)
  2109  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
  2110  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2111  	&& s == nil
  2112  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2113  	&& y1.Uses == 1 && y2.Uses == 1
  2114  	&& o0.Uses == 1
  2115  	&& mergePoint(b,x0,x1,x2) != nil
  2116  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2117  	&& isSamePtr(p1, p)
  2118  	&& clobber(x0, x1, x2, y1, y2, o0)
  2119  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
  2120  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2121  	            x0:(MOVHUloadidx ptr idx mem)
  2122  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2123  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2124  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2125  	&& y1.Uses == 1 && y2.Uses == 1
  2126  	&& o0.Uses == 1
  2127  	&& mergePoint(b,x0,x1,x2) != nil
  2128  	&& clobber(x0, x1, x2, y1, y2, o0)
  2129  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
  2130  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2131  	            x0:(MOVHUloadidx2 ptr0 idx0 mem)
  2132  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
  2133  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2134  	&& s == nil
  2135  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2136  	&& y1.Uses == 1 && y2.Uses == 1
  2137  	&& o0.Uses == 1
  2138  	&& mergePoint(b,x0,x1,x2) != nil
  2139  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2140  	&& isSamePtr(p1, p)
  2141  	&& clobber(x0, x1, x2, y1, y2, o0)
  2142  	=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
  2143  
  2144  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit
  2145  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2146  	            x0:(MOVWUload [i0] {s} p mem)
  2147  	y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
  2148  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2149  	y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
  2150  	y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
  2151  	&& i4 == i0+4
  2152  	&& i5 == i0+5
  2153  	&& i6 == i0+6
  2154  	&& i7 == i0+7
  2155  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2156  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2157  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2158  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2159  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2160  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2161  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2162  	            x0:(MOVWUloadidx ptr0 idx0 mem)
  2163  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
  2164  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2165  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2166  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2167  	&& s == nil
  2168  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2169  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2170  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2171  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2172  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2173  	&& isSamePtr(p1, p)
  2174  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2175  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
  2176  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2177  	            x0:(MOVWUloadidx4 ptr0 idx0 mem)
  2178  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
  2179  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2180  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2181  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2182  	&& s == nil
  2183  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2184  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2185  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2186  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2187  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2188  	&& isSamePtr(p1, p)
  2189  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2190  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
  2191  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2192  	            x0:(MOVWUloadidx ptr idx mem)
  2193  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2194  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2195  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2196  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2197  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2198  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2199  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2200  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2201  	&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
  2202  	=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
  2203  
  2204  // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 32-bit
  2205  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2206  	y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
  2207  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2208  	y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
  2209  	y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
  2210  	&& i1 == i0+1
  2211  	&& i2 == i0+2
  2212  	&& i3 == i0+3
  2213  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2214  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2215  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2216  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2217  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2218  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2219  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2220  	y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
  2221  	y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
  2222  	y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2223  	y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
  2224  	&& s == nil
  2225  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2226  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2227  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2228  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2229  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2230  	&& isSamePtr(p1, p)
  2231  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2232  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
  2233  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2234  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2235  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2236  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2237  	y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
  2238  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2239  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2240  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2241  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2242  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2243  	=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
  2244  
  2245  // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 64-bit
  2246  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2247  	y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
  2248  	y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
  2249  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2250  	y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
  2251  	y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
  2252  	y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
  2253  	y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
  2254  	y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
  2255  	&& i1 == i0+1
  2256  	&& i2 == i0+2
  2257  	&& i3 == i0+3
  2258  	&& i4 == i0+4
  2259  	&& i5 == i0+5
  2260  	&& i6 == i0+6
  2261  	&& i7 == i0+7
  2262  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2263  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2264  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2265  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2266  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2267  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2268  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2269  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2270  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
  2271  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2272  	y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
  2273  	y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
  2274  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2275  	y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
  2276  	y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
  2277  	y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
  2278  	y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2279  	y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
  2280  	&& s == nil
  2281  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2282  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2283  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2284  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2285  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2286  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2287  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2288  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2289  	&& isSamePtr(p1, p)
  2290  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2291  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
  2292  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2293  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2294  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2295  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2296  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2297  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2298  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2299  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2300  	y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
  2301  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2302  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2303  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2304  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2305  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2306  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2307  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2308  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2309  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
  2310  
  2311  // big endian loads
  2312  // b[1] | b[0]<<8 => load 16-bit, reverse
  2313  (ORshiftLL <t> [8]
  2314  	y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
  2315  	y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
  2316  	&& i1 == i0+1
  2317  	&& x0.Uses == 1 && x1.Uses == 1
  2318  	&& y0.Uses == 1 && y1.Uses == 1
  2319  	&& mergePoint(b,x0,x1) != nil
  2320  	&& clobber(x0, x1, y0, y1)
  2321  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
  2322  (ORshiftLL <t> [8]
  2323  	y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
  2324  	y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
  2325  	&& s == nil
  2326  	&& x0.Uses == 1 && x1.Uses == 1
  2327  	&& y0.Uses == 1 && y1.Uses == 1
  2328  	&& mergePoint(b,x0,x1) != nil
  2329  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2330  	&& clobber(x0, x1, y0, y1)
  2331  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
  2332  (ORshiftLL <t> [8]
  2333  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
  2334  	y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
  2335  	&& x0.Uses == 1 && x1.Uses == 1
  2336  	&& y0.Uses == 1 && y1.Uses == 1
  2337  	&& mergePoint(b,x0,x1) != nil
  2338  	&& clobber(x0, x1, y0, y1)
  2339  	=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
  2340  
  2341  // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit, reverse
  2342  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2343  	y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
  2344  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2345  	y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
  2346  	&& i1 == i0+1
  2347  	&& i2 == i0+2
  2348  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2349  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2350  	&& o0.Uses == 1
  2351  	&& mergePoint(b,x0,x1,x2) != nil
  2352  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2353  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2354  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2355  	y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
  2356  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2357  	y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
  2358  	&& s == nil
  2359  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2360  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2361  	&& o0.Uses == 1
  2362  	&& mergePoint(b,x0,x1,x2) != nil
  2363  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2364  	&& isSamePtr(p1, p)
  2365  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2366  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2367  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2368  	y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
  2369  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2370  	y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
  2371  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2372  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2373  	&& o0.Uses == 1
  2374  	&& mergePoint(b,x0,x1,x2) != nil
  2375  	&& clobber(x0, x1, x2, y0, y1, y2, o0)
  2376  	=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2377  
  2378  // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 64-bit, reverse
  2379  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2380  	y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
  2381  	y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
  2382  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2383  	y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
  2384  	y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
  2385  	&& i1 == i0+1
  2386  	&& i2 == i0+2
  2387  	&& i3 == i0+3
  2388  	&& i4 == i0+4
  2389  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2390  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2391  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2392  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2393  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2394  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2395  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2396  	y0:(REVW    x0:(MOVWUload [4] {s} p mem))
  2397  	y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
  2398  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2399  	y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2400  	y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
  2401  	&& s == nil
  2402  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2403  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2404  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2405  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2406  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2407  	&& isSamePtr(p1, p)
  2408  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2409  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2410  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2411  	y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
  2412  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2413  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2414  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2415  	y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
  2416  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2417  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2418  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2419  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2420  	&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
  2421  	=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2422  
  2423  // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit, reverse
  2424  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2425  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2426  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2427  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2428  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2429  	&& i1 == i0+1
  2430  	&& i2 == i0+2
  2431  	&& i3 == i0+3
  2432  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2433  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2434  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2435  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2436  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2437  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2438  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2439  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2440  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2441  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2442  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2443  	&& s == nil
  2444  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2445  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2446  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2447  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2448  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2449  	&& isSamePtr(p1, p)
  2450  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2451  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2452  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2453  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2454  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2455  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2456  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2457  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2458  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2459  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2460  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2461  	&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
  2462  	=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2463  
  2464  // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit, reverse
  2465  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2466  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2467  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2468  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2469  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2470  	y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
  2471  	y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
  2472  	y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
  2473  	y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
  2474  	&& i1 == i0+1
  2475  	&& i2 == i0+2
  2476  	&& i3 == i0+3
  2477  	&& i4 == i0+4
  2478  	&& i5 == i0+5
  2479  	&& i6 == i0+6
  2480  	&& i7 == i0+7
  2481  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2482  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2483  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2484  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2485  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2486  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2487  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2488  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2489  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
  2490  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2491  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2492  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2493  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2494  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2495  	y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
  2496  	y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
  2497  	y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
  2498  	y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
  2499  	&& s == nil
  2500  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2501  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2502  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2503  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2504  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2505  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2506  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2507  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2508  	&& isSamePtr(p1, p)
  2509  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2510  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2511  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2512  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2513  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2514  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2515  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2516  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2517  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2518  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2519  	y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2520  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2521  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2522  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2523  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2524  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2525  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2526  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2527  	&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
  2528  	=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2529  
  2530  // Combine zero stores into larger (unaligned) stores.
  2531  (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
  2532  	&& x.Uses == 1
  2533  	&& areAdjacentOffsets(int64(i),int64(j),1)
  2534  	&& isSamePtr(ptr0, ptr1)
  2535  	&& clobber(x)
  2536  	=> (MOVHstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2537  (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
  2538  	&& x.Uses == 1
  2539  	&& s == nil
  2540  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2541  	&& clobber(x)
  2542  	=> (MOVHstorezeroidx ptr1 idx1 mem)
  2543  (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
  2544  	&& x.Uses == 1
  2545  	&& clobber(x)
  2546  	=> (MOVHstorezeroidx ptr idx mem)
  2547  (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
  2548  	&& x.Uses == 1
  2549  	&& areAdjacentOffsets(int64(i),int64(j),2)
  2550  	&& isSamePtr(ptr0, ptr1)
  2551  	&& clobber(x)
  2552  	=> (MOVWstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2553  (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
  2554  	&& x.Uses == 1
  2555  	&& s == nil
  2556  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2557  	&& clobber(x)
  2558  	=> (MOVWstorezeroidx ptr1 idx1 mem)
  2559  (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
  2560  	&& x.Uses == 1
  2561  	&& clobber(x)
  2562  	=> (MOVWstorezeroidx ptr idx mem)
  2563  (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
  2564  	&& x.Uses == 1
  2565  	&& s == nil
  2566  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2567  	&& clobber(x)
  2568  	=> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
  2569  (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
  2570  	&& x.Uses == 1
  2571  	&& areAdjacentOffsets(int64(i),int64(j),4)
  2572  	&& isSamePtr(ptr0, ptr1)
  2573  	&& clobber(x)
  2574  	=> (MOVDstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2575  (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
  2576  	&& x.Uses == 1
  2577  	&& s == nil
  2578  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2579  	&& clobber(x)
  2580  	=> (MOVDstorezeroidx ptr1 idx1 mem)
  2581  (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
  2582  	&& x.Uses == 1
  2583  	&& clobber(x)
  2584  	=> (MOVDstorezeroidx ptr idx mem)
  2585  (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
  2586  	&& x.Uses == 1
  2587  	&& s == nil
  2588  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2589  	&& clobber(x)
  2590  	=> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
  2591  (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
  2592  	&& x.Uses == 1
  2593  	&& areAdjacentOffsets(int64(i),int64(j),8)
  2594  	&& isSamePtr(ptr0, ptr1)
  2595  	&& clobber(x)
  2596  	=> (MOVQstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
  2597  (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
  2598  	&& x.Uses == 1
  2599  	&& s == nil
  2600  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2601  	&& clobber(x)
  2602  	=> (MOVQstorezero [0] {s} p0 mem)
  2603  (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
  2604  	&& x.Uses == 1
  2605  	&& s == nil
  2606  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2607  	&& clobber(x)
  2608  	=> (MOVQstorezero [0] {s} p0 mem)
  2609  
  2610  // Combine stores into larger (unaligned) stores.
  2611  (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2612  	&& x.Uses == 1
  2613  	&& isSamePtr(ptr0, ptr1)
  2614  	&& clobber(x)
  2615  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2616  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2617  	&& x.Uses == 1
  2618  	&& s == nil
  2619  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2620  	&& clobber(x)
  2621  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2622  (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
  2623  	&& x.Uses == 1
  2624  	&& clobber(x)
  2625  	=> (MOVHstoreidx ptr idx w mem)
  2626  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2627  	&& x.Uses == 1
  2628  	&& isSamePtr(ptr0, ptr1)
  2629  	&& clobber(x)
  2630  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2631  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2632  	&& x.Uses == 1
  2633  	&& s == nil
  2634  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2635  	&& clobber(x)
  2636  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2637  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2638  	&& x.Uses == 1
  2639  	&& isSamePtr(ptr0, ptr1)
  2640  	&& clobber(x)
  2641  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2642  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2643  	&& x.Uses == 1
  2644  	&& s == nil
  2645  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2646  	&& clobber(x)
  2647  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2648  (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2649  	&& x.Uses == 1
  2650  	&& isSamePtr(ptr0, ptr1)
  2651  	&& clobber(x)
  2652  	=> (MOVHstore [i-1] {s} ptr0 w mem)
  2653  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
  2654  	&& x.Uses == 1
  2655  	&& s == nil
  2656  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2657  	&& clobber(x)
  2658  	=> (MOVHstoreidx ptr1 idx1 w mem)
  2659  (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
  2660  	&& x.Uses == 1
  2661  	&& isSamePtr(ptr0, ptr1)
  2662  	&& clobber(x)
  2663  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2664  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
  2665  	&& x.Uses == 1
  2666  	&& s == nil
  2667  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2668  	&& clobber(x)
  2669  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2670  (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
  2671  	&& x.Uses == 1
  2672  	&& isSamePtr(ptr0, ptr1)
  2673  	&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
  2674  	&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
  2675  	&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
  2676  	&& clobber(x)
  2677  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2678  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
  2679  	&& x.Uses == 1
  2680  	&& s == nil
  2681  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2682  	&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
  2683  	&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
  2684  	&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
  2685  	&& clobber(x)
  2686  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2687  (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2688  	&& x.Uses == 1
  2689  	&& isSamePtr(ptr0, ptr1)
  2690  	&& clobber(x)
  2691  	=> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2692  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2693  	&& x.Uses == 1
  2694  	&& s == nil
  2695  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2696  	&& clobber(x)
  2697  	=> (MOVHstoreidx ptr1 idx1 w0 mem)
  2698  (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2699  	&& x.Uses == 1
  2700  	&& isSamePtr(ptr0, ptr1)
  2701  	&& clobber(x)
  2702  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2703  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2704  	&& x.Uses == 1
  2705  	&& s == nil
  2706  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2707  	&& clobber(x)
  2708  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2709  (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
  2710  	&& x.Uses == 1
  2711  	&& clobber(x)
  2712  	=> (MOVWstoreidx ptr idx w mem)
  2713  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2714  	&& x.Uses == 1
  2715  	&& s == nil
  2716  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2717  	&& clobber(x)
  2718  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2719  (MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2720  	&& x.Uses == 1
  2721  	&& isSamePtr(ptr0, ptr1)
  2722  	&& clobber(x)
  2723  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2724  (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2725  	&& x.Uses == 1
  2726  	&& s == nil
  2727  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2728  	&& clobber(x)
  2729  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2730  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2731  	&& x.Uses == 1
  2732  	&& s == nil
  2733  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2734  	&& clobber(x)
  2735  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2736  (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2737  	&& x.Uses == 1
  2738  	&& isSamePtr(ptr0, ptr1)
  2739  	&& clobber(x)
  2740  	=> (MOVWstore [i-2] {s} ptr0 w mem)
  2741  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
  2742  	&& x.Uses == 1
  2743  	&& s == nil
  2744  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2745  	&& clobber(x)
  2746  	=> (MOVWstoreidx ptr1 idx1 w mem)
  2747  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2748  	&& x.Uses == 1
  2749  	&& s == nil
  2750  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2751  	&& clobber(x)
  2752  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2753  (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
  2754  	&& x.Uses == 1
  2755  	&& isSamePtr(ptr0, ptr1)
  2756  	&& clobber(x)
  2757  	=> (MOVWstore [i-2] {s} ptr0 w0 mem)
  2758  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2759  	&& x.Uses == 1
  2760  	&& s == nil
  2761  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2762  	&& clobber(x)
  2763  	=> (MOVWstoreidx ptr1 idx1 w0 mem)
  2764  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2765  	&& x.Uses == 1
  2766  	&& s == nil
  2767  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2768  	&& clobber(x)
  2769  	=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
  2770  (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
  2771  	&& x.Uses == 1
  2772  	&& isSamePtr(ptr0, ptr1)
  2773  	&& clobber(x)
  2774  	=> (MOVDstore [i-4] {s} ptr0 w mem)
  2775  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
  2776  	&& x.Uses == 1
  2777  	&& s == nil
  2778  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2779  	&& clobber(x)
  2780  	=> (MOVDstoreidx ptr1 idx1 w mem)
  2781  (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
  2782  	&& x.Uses == 1
  2783  	&& clobber(x)
  2784  	=> (MOVDstoreidx ptr idx w mem)
  2785  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
  2786  	&& x.Uses == 1
  2787  	&& s == nil
  2788  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2789  	&& clobber(x)
  2790  	=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
  2791  (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
  2792  	&& x.Uses == 1
  2793  	&& isSamePtr(ptr0, ptr1)
  2794  	&& clobber(x)
  2795  	=> (MOVDstore [i-4] {s} ptr0 w0 mem)
  2796  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2797  	&& x.Uses == 1
  2798  	&& s == nil
  2799  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2800  	&& clobber(x)
  2801  	=> (MOVDstoreidx ptr1 idx1 w0 mem)
  2802  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2803  	&& x.Uses == 1
  2804  	&& s == nil
  2805  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2806  	&& clobber(x)
  2807  	=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
  2808  (MOVBstore [i] {s} ptr w
  2809  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2810  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2811  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
  2812  	x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
  2813  	x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
  2814  	x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
  2815  	x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
  2816  	&& x0.Uses == 1
  2817  	&& x1.Uses == 1
  2818  	&& x2.Uses == 1
  2819  	&& x3.Uses == 1
  2820  	&& x4.Uses == 1
  2821  	&& x5.Uses == 1
  2822  	&& x6.Uses == 1
  2823  	&& clobber(x0, x1, x2, x3, x4, x5, x6)
  2824  	=> (MOVDstore [i-7] {s} ptr (REV <typ.UInt64> w) mem)
  2825  (MOVBstore [7] {s} p w
  2826  	x0:(MOVBstore [6] {s} p (SRLconst [8] w)
  2827  	x1:(MOVBstore [5] {s} p (SRLconst [16] w)
  2828  	x2:(MOVBstore [4] {s} p (SRLconst [24] w)
  2829  	x3:(MOVBstore [3] {s} p (SRLconst [32] w)
  2830  	x4:(MOVBstore [2] {s} p (SRLconst [40] w)
  2831  	x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
  2832  	x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
  2833  	&& x0.Uses == 1
  2834  	&& x1.Uses == 1
  2835  	&& x2.Uses == 1
  2836  	&& x3.Uses == 1
  2837  	&& x4.Uses == 1
  2838  	&& x5.Uses == 1
  2839  	&& x6.Uses == 1
  2840  	&& s == nil
  2841  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2842  	&& isSamePtr(p1, p)
  2843  	&& clobber(x0, x1, x2, x3, x4, x5, x6)
  2844  	=> (MOVDstoreidx ptr0 idx0 (REV <typ.UInt64> w) mem)
  2845  (MOVBstore [i] {s} ptr w
  2846  	x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
  2847  	x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
  2848  	x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2849  	&& x0.Uses == 1
  2850  	&& x1.Uses == 1
  2851  	&& x2.Uses == 1
  2852  	&& clobber(x0, x1, x2)
  2853  	=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
  2854  (MOVBstore [3] {s} p w
  2855  	x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
  2856  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
  2857  	x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2858  	&& x0.Uses == 1
  2859  	&& x1.Uses == 1
  2860  	&& x2.Uses == 1
  2861  	&& s == nil
  2862  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2863  	&& isSamePtr(p1, p)
  2864  	&& clobber(x0, x1, x2)
  2865  	=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
  2866  (MOVBstoreidx ptr (ADDconst [3] idx) w
  2867  	x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2868  	x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2869  	x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2870  	&& x0.Uses == 1
  2871  	&& x1.Uses == 1
  2872  	&& x2.Uses == 1
  2873  	&& clobber(x0, x1, x2)
  2874  	=> (MOVWstoreidx ptr idx (REVW <typ.UInt32> w) mem)
  2875  (MOVBstoreidx ptr idx w
  2876  	x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2877  	x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2878  	x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2879  	&& x0.Uses == 1
  2880  	&& x1.Uses == 1
  2881  	&& x2.Uses == 1
  2882  	&& clobber(x0, x1, x2)
  2883  	=> (MOVWstoreidx ptr idx w mem)
  2884  (MOVBstore [i] {s} ptr w
  2885  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
  2886  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
  2887  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
  2888  	&& x0.Uses == 1
  2889  	&& x1.Uses == 1
  2890  	&& x2.Uses == 1
  2891  	&& clobber(x0, x1, x2)
  2892  	=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
  2893  (MOVBstore [3] {s} p w
  2894  	x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
  2895  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
  2896  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
  2897  	&& x0.Uses == 1
  2898  	&& x1.Uses == 1
  2899  	&& x2.Uses == 1
  2900  	&& s == nil
  2901  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2902  	&& isSamePtr(p1, p)
  2903  	&& clobber(x0, x1, x2)
  2904  	=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
  2905  (MOVBstore [i] {s} ptr w
  2906  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2907  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2908  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
  2909  	&& x0.Uses == 1
  2910  	&& x1.Uses == 1
  2911  	&& x2.Uses == 1
  2912  	&& clobber(x0, x1, x2)
  2913  	=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
  2914  (MOVBstore [3] {s} p w
  2915  	x0:(MOVBstore [2] {s} p (SRLconst [8] w)
  2916  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
  2917  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
  2918  	&& x0.Uses == 1
  2919  	&& x1.Uses == 1
  2920  	&& x2.Uses == 1
  2921  	&& s == nil
  2922  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2923  	&& isSamePtr(p1, p)
  2924  	&& clobber(x0, x1, x2)
  2925  	=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
  2926  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
  2927  	&& x.Uses == 1
  2928  	&& clobber(x)
  2929  	=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
  2930  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
  2931  	&& x.Uses == 1
  2932  	&& s == nil
  2933  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2934  	&& clobber(x)
  2935  	=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
  2936  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
  2937  	&& x.Uses == 1
  2938  	&& clobber(x)
  2939  	=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
  2940  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
  2941  	&& x.Uses == 1
  2942  	&& s == nil
  2943  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2944  	&& clobber(x)
  2945  	=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
  2946  (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
  2947  	&& x.Uses == 1
  2948  	&& clobber(x)
  2949  	=> (MOVHstoreidx ptr idx (REV16W <typ.UInt16> w) mem)
  2950  (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
  2951  	&& x.Uses == 1
  2952  	&& clobber(x)
  2953  	=> (MOVHstoreidx ptr idx w mem)
  2954  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2955  	&& x.Uses == 1
  2956  	&& clobber(x)
  2957  	=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
  2958  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  2959  	&& x.Uses == 1
  2960  	&& s == nil
  2961  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2962  	&& clobber(x)
  2963  	=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
  2964  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
  2965  	&& x.Uses == 1
  2966  	&& clobber(x)
  2967  	=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
  2968  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
  2969  	&& x.Uses == 1
  2970  	&& s == nil
  2971  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2972  	&& clobber(x)
  2973  	=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
  2974  
  2975  // FP simplification
  2976  (FNEGS (FMULS x y)) => (FNMULS x y)
  2977  (FNEGD (FMULD x y)) => (FNMULD x y)
  2978  (FMULS (FNEGS x) y) => (FNMULS x y)
  2979  (FMULD (FNEGD x) y) => (FNMULD x y)
  2980  (FNEGS (FNMULS x y)) => (FMULS x y)
  2981  (FNEGD (FNMULD x y)) => (FMULD x y)
  2982  (FNMULS (FNEGS x) y) => (FMULS x y)
  2983  (FNMULD (FNEGD x) y) => (FMULD x y)
  2984  
  2985  (FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
  2986  (FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
  2987  (FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
  2988  (FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
  2989  (FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y)
  2990  (FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y)
  2991  (FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
  2992  (FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
  2993  (FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
  2994  (FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
  2995  (FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y)
  2996  (FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y)
  2997  
  2998  (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))])
  2999  (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  3000  (MOVWUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  3001  (MOVDload  [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))])
  3002  
  3003  // Prefetch instructions (aux is option: 0 - PLDL1KEEP; 1 - PLDL1STRM)
  3004  (PrefetchCache addr mem)         => (PRFM [0] addr mem)
  3005  (PrefetchCacheStreamed addr mem) => (PRFM [1] addr mem)
  3006  
  3007  // Arch-specific inlining for small or disjoint runtime.memmove
  3008  (SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore  _ src s3:(MOVDstore {t} _ dst mem)))))
  3009  	&& sz >= 0
  3010  	&& isSameCall(sym, "runtime.memmove")
  3011  	&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
  3012  	&& isInlinableMemmove(dst, src, sz, config)
  3013  	&& clobber(s1, s2, s3, call)
  3014  	=> (Move [sz] dst src mem)
  3015  
  3016  // Match post-lowering calls, register version.
  3017  (SelectN [0] call:(CALLstatic {sym} dst src (MOVDconst [sz]) mem))
  3018  	&& sz >= 0
  3019  	&& isSameCall(sym, "runtime.memmove")
  3020  	&& call.Uses == 1
  3021  	&& isInlinableMemmove(dst, src, sz, config)
  3022  	&& clobber(call)
  3023  	=> (Move [sz] dst src mem)
  3024  
  3025  ((REV|REVW) ((REV|REVW) p)) => p
  3026  
  3027  // runtime/internal/math.MulUintptr intrinsics
  3028  
  3029  (Select0 (Mul64uover x y)) => (MUL x y)
  3030  (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH <typ.UInt64> x y) [0]))