github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ssa/gen/ARM64.rules (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  (AddPtr x y) -> (ADD x y)
     6  (Add64 x y) -> (ADD x y)
     7  (Add32 x y) -> (ADD x y)
     8  (Add16 x y) -> (ADD x y)
     9  (Add8 x y) -> (ADD x y)
    10  (Add32F x y) -> (FADDS x y)
    11  (Add64F x y) -> (FADDD x y)
    12  
    13  (SubPtr x y) -> (SUB x y)
    14  (Sub64 x y) -> (SUB x y)
    15  (Sub32 x y) -> (SUB x y)
    16  (Sub16 x y) -> (SUB x y)
    17  (Sub8 x y) -> (SUB x y)
    18  (Sub32F x y) -> (FSUBS x y)
    19  (Sub64F x y) -> (FSUBD x y)
    20  
    21  (Mul64 x y) -> (MUL x y)
    22  (Mul32 x y) -> (MULW x y)
    23  (Mul16 x y) -> (MULW x y)
    24  (Mul8 x y) -> (MULW x y)
    25  (Mul32F x y) -> (FMULS x y)
    26  (Mul64F x y) -> (FMULD x y)
    27  
    28  (Hmul64 x y) -> (MULH x y)
    29  (Hmul64u x y) -> (UMULH x y)
    30  (Hmul32 x y) -> (SRAconst (MULL <typ.Int64> x y) [32])
    31  (Hmul32u x y) -> (SRAconst (UMULL <typ.UInt64> x y) [32])
    32  (Mul64uhilo x y) -> (LoweredMuluhilo x y)
    33  
    34  (Div64 x y) -> (DIV x y)
    35  (Div64u x y) -> (UDIV x y)
    36  (Div32 x y) -> (DIVW x y)
    37  (Div32u x y) -> (UDIVW x y)
    38  (Div16 x y) -> (DIVW (SignExt16to32 x) (SignExt16to32 y))
    39  (Div16u x y) -> (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
    40  (Div8 x y) -> (DIVW (SignExt8to32 x) (SignExt8to32 y))
    41  (Div8u x y) -> (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
    42  (Div32F x y) -> (FDIVS x y)
    43  (Div64F x y) -> (FDIVD x y)
    44  
    45  (Mod64 x y) -> (MOD x y)
    46  (Mod64u x y) -> (UMOD x y)
    47  (Mod32 x y) -> (MODW x y)
    48  (Mod32u x y) -> (UMODW x y)
    49  (Mod16 x y) -> (MODW (SignExt16to32 x) (SignExt16to32 y))
    50  (Mod16u x y) -> (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
    51  (Mod8 x y) -> (MODW (SignExt8to32 x) (SignExt8to32 y))
    52  (Mod8u x y) -> (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
    53  
    54  // (x + y) / 2 with x>=y -> (x - y) / 2 + y
    55  (Avg64u <t> x y) -> (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
    56  
    57  (And64 x y) -> (AND x y)
    58  (And32 x y) -> (AND x y)
    59  (And16 x y) -> (AND x y)
    60  (And8 x y) -> (AND x y)
    61  
    62  (Or64 x y) -> (OR x y)
    63  (Or32 x y) -> (OR x y)
    64  (Or16 x y) -> (OR x y)
    65  (Or8 x y) -> (OR x y)
    66  
    67  (Xor64 x y) -> (XOR x y)
    68  (Xor32 x y) -> (XOR x y)
    69  (Xor16 x y) -> (XOR x y)
    70  (Xor8 x y) -> (XOR x y)
    71  
    72  // unary ops
    73  (Neg64 x) -> (NEG x)
    74  (Neg32 x) -> (NEG x)
    75  (Neg16 x) -> (NEG x)
    76  (Neg8 x) -> (NEG x)
    77  (Neg32F x) -> (FNEGS x)
    78  (Neg64F x) -> (FNEGD x)
    79  
    80  (Com64 x) -> (MVN x)
    81  (Com32 x) -> (MVN x)
    82  (Com16 x) -> (MVN x)
    83  (Com8 x) -> (MVN x)
    84  
    85  // math package intrinsics
    86  (Abs x) -> (FABSD x)
    87  (Sqrt x) -> (FSQRTD x)
    88  (Ceil  x) -> (FRINTPD x)
    89  (Floor x) -> (FRINTMD x)
    90  (Round x) -> (FRINTAD x)
    91  (RoundToEven x) -> (FRINTND x)
    92  (Trunc x) -> (FRINTZD x)
    93  (FMA x y z) -> (FMADDD z x y)
    94  
    95  // lowering rotates
    96  (RotateLeft8 <t> x (MOVDconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
    97  (RotateLeft16 <t> x (MOVDconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
    98  (RotateLeft32 x y) -> (RORW x (NEG <y.Type> y))
    99  (RotateLeft64 x y) -> (ROR x (NEG <y.Type> y))
   100  
   101  (Ctz64NonZero x) -> (Ctz64 x)
   102  (Ctz32NonZero x) -> (Ctz32 x)
   103  (Ctz16NonZero x) -> (Ctz32 x)
   104  (Ctz8NonZero x) -> (Ctz32 x)
   105  
   106  (Ctz64 <t> x) -> (CLZ (RBIT <t> x))
   107  (Ctz32 <t> x) -> (CLZW (RBITW <t> x))
   108  (Ctz16 <t> x) -> (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
   109  (Ctz8 <t> x) -> (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
   110  
   111  (PopCount64 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
   112  (PopCount32 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
   113  (PopCount16 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
   114  
   115  // Load args directly into the register class where it will be used.
   116  (FMOVDgpfp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
   117  (FMOVDfpgp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
   118  
   119  // Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set.
   120  (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) -> (FMOVDstore [off] {sym} ptr val mem)
   121  (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) -> (MOVDstore [off] {sym} ptr val mem)
   122  (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) -> (FMOVSstore [off] {sym} ptr val mem)
   123  (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) -> (MOVWstore [off] {sym} ptr val mem)
   124  
   125  // float <-> int register moves, with no conversion.
   126  // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
   127  (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) -> (FMOVDfpgp val)
   128  (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) -> (FMOVDgpfp val)
   129  (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) -> (FMOVSfpgp val)
   130  (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) -> (FMOVSgpfp val)
   131  
   132  (BitLen64 x) -> (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
   133  (BitLen32 x) -> (SUB (MOVDconst [32]) (CLZW <typ.Int> x))
   134  
   135  (Bswap64 x) -> (REV x)
   136  (Bswap32 x) -> (REVW x)
   137  
   138  (BitRev64 x) -> (RBIT x)
   139  (BitRev32 x) -> (RBITW x)
   140  (BitRev16 x) -> (SRLconst [48] (RBIT <typ.UInt64> x))
   141  (BitRev8 x) -> (SRLconst [56] (RBIT <typ.UInt64> x))
   142  
   143  // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into
   144  // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or
   145  // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant.
   146  // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass.
   147  (UMOD <typ.UInt64> x y) -> (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
   148  (UMODW <typ.UInt32> x y) -> (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
   149  
   150  // 64-bit addition with carry.
   151  (Select0 (Add64carry x y c)) -> (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
   152  (Select1 (Add64carry x y c)) -> (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
   153  
   154  // 64-bit subtraction with borrowing.
   155  (Select0 (Sub64borrow x y bo)) -> (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))
   156  (Select1 (Sub64borrow x y bo)) -> (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))))
   157  
   158  // boolean ops -- booleans are represented with 0=false, 1=true
   159  (AndB x y) -> (AND x y)
   160  (OrB x y) -> (OR x y)
   161  (EqB x y) -> (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
   162  (NeqB x y) -> (XOR x y)
   163  (Not x) -> (XOR (MOVDconst [1]) x)
   164  
   165  // shifts
   166  // hardware instruction uses only the low 6 bits of the shift
   167  // we compare to 64 to ensure Go semantics for large shifts
   168  // Rules about rotates with non-const shift are based on the following rules,
   169  // if the following rules change, please also modify the rules based on them.
   170  (Lsh64x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   171  (Lsh64x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   172  (Lsh64x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   173  (Lsh64x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   174  
   175  (Lsh32x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   176  (Lsh32x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   177  (Lsh32x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   178  (Lsh32x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   179  
   180  (Lsh16x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   181  (Lsh16x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   182  (Lsh16x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   183  (Lsh16x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   184  
   185  (Lsh8x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   186  (Lsh8x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   187  (Lsh8x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   188  (Lsh8x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   189  
   190  (Rsh64Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   191  (Rsh64Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   192  (Rsh64Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   193  (Rsh64Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   194  
   195  (Rsh32Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   196  (Rsh32Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   197  (Rsh32Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   198  (Rsh32Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   199  
   200  (Rsh16Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   201  (Rsh16Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   202  (Rsh16Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   203  (Rsh16Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   204  
   205  (Rsh8Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   206  (Rsh8Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   207  (Rsh8Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   208  (Rsh8Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   209  
   210  (Rsh64x64 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   211  (Rsh64x32 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   212  (Rsh64x16 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   213  (Rsh64x8  x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   214  
   215  (Rsh32x64 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   216  (Rsh32x32 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   217  (Rsh32x16 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   218  (Rsh32x8  x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   219  
   220  (Rsh16x64 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   221  (Rsh16x32 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   222  (Rsh16x16 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   223  (Rsh16x8  x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   224  
   225  (Rsh8x64 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   226  (Rsh8x32 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   227  (Rsh8x16 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   228  (Rsh8x8  x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   229  
   230  // constants
   231  (Const64 [val]) -> (MOVDconst [val])
   232  (Const32 [val]) -> (MOVDconst [val])
   233  (Const16 [val]) -> (MOVDconst [val])
   234  (Const8 [val]) -> (MOVDconst [val])
   235  (Const32F [val]) -> (FMOVSconst [val])
   236  (Const64F [val]) -> (FMOVDconst [val])
   237  (ConstNil) -> (MOVDconst [0])
   238  (ConstBool [b]) -> (MOVDconst [b])
   239  
   240  (Slicemask <t> x) -> (SRAconst (NEG <t> x) [63])
   241  
   242  // truncations
   243  // Because we ignore high parts of registers, truncates are just copies.
   244  (Trunc16to8 x) -> x
   245  (Trunc32to8 x) -> x
   246  (Trunc32to16 x) -> x
   247  (Trunc64to8 x) -> x
   248  (Trunc64to16 x) -> x
   249  (Trunc64to32 x) -> x
   250  
   251  // Zero-/Sign-extensions
   252  (ZeroExt8to16 x) -> (MOVBUreg x)
   253  (ZeroExt8to32 x) -> (MOVBUreg x)
   254  (ZeroExt16to32 x) -> (MOVHUreg x)
   255  (ZeroExt8to64 x) -> (MOVBUreg x)
   256  (ZeroExt16to64 x) -> (MOVHUreg x)
   257  (ZeroExt32to64 x) -> (MOVWUreg x)
   258  
   259  (SignExt8to16 x) -> (MOVBreg x)
   260  (SignExt8to32 x) -> (MOVBreg x)
   261  (SignExt16to32 x) -> (MOVHreg x)
   262  (SignExt8to64 x) -> (MOVBreg x)
   263  (SignExt16to64 x) -> (MOVHreg x)
   264  (SignExt32to64 x) -> (MOVWreg x)
   265  
   266  // float <-> int conversion
   267  (Cvt32to32F x) -> (SCVTFWS x)
   268  (Cvt32to64F x) -> (SCVTFWD x)
   269  (Cvt64to32F x) -> (SCVTFS x)
   270  (Cvt64to64F x) -> (SCVTFD x)
   271  (Cvt32Uto32F x) -> (UCVTFWS x)
   272  (Cvt32Uto64F x) -> (UCVTFWD x)
   273  (Cvt64Uto32F x) -> (UCVTFS x)
   274  (Cvt64Uto64F x) -> (UCVTFD x)
   275  (Cvt32Fto32 x) -> (FCVTZSSW x)
   276  (Cvt64Fto32 x) -> (FCVTZSDW x)
   277  (Cvt32Fto64 x) -> (FCVTZSS x)
   278  (Cvt64Fto64 x) -> (FCVTZSD x)
   279  (Cvt32Fto32U x) -> (FCVTZUSW x)
   280  (Cvt64Fto32U x) -> (FCVTZUDW x)
   281  (Cvt32Fto64U x) -> (FCVTZUS x)
   282  (Cvt64Fto64U x) -> (FCVTZUD x)
   283  (Cvt32Fto64F x) -> (FCVTSD x)
   284  (Cvt64Fto32F x) -> (FCVTDS x)
   285  
   286  (Round32F x) -> (LoweredRound32F x)
   287  (Round64F x) -> (LoweredRound64F x)
   288  
   289  // comparisons
   290  (Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   291  (Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   292  (Eq32 x y) -> (Equal (CMPW x y))
   293  (Eq64 x y) -> (Equal (CMP x y))
   294  (EqPtr x y) -> (Equal (CMP x y))
   295  (Eq32F x y) -> (Equal (FCMPS x y))
   296  (Eq64F x y) -> (Equal (FCMPD x y))
   297  
   298  (Neq8 x y)  -> (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   299  (Neq16 x y) -> (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   300  (Neq32 x y) -> (NotEqual (CMPW x y))
   301  (Neq64 x y) -> (NotEqual (CMP x y))
   302  (NeqPtr x y) -> (NotEqual (CMP x y))
   303  (Neq32F x y) -> (NotEqual (FCMPS x y))
   304  (Neq64F x y) -> (NotEqual (FCMPD x y))
   305  
   306  (Less8 x y)  -> (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   307  (Less16 x y) -> (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   308  (Less32 x y) -> (LessThan (CMPW x y))
   309  (Less64 x y) -> (LessThan (CMP x y))
   310  
   311  // Set condition flags for floating-point comparisons "x < y"
   312  // and "x <= y". Because if either or both of the operands are
   313  // NaNs, all three of (x < y), (x == y) and (x > y) are false,
   314  // and ARM Manual says FCMP instruction sets PSTATE.<N,Z,C,V>
   315  // of this case to (0, 0, 1, 1).
   316  (Less32F x y) -> (LessThanF (FCMPS x y))
   317  (Less64F x y) -> (LessThanF (FCMPD x y))
   318  
   319  (Less8U x y)  -> (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   320  (Less16U x y) -> (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   321  (Less32U x y) -> (LessThanU (CMPW x y))
   322  (Less64U x y) -> (LessThanU (CMP x y))
   323  
   324  (Leq8 x y)  -> (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   325  (Leq16 x y) -> (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   326  (Leq32 x y) -> (LessEqual (CMPW x y))
   327  (Leq64 x y) -> (LessEqual (CMP x y))
   328  
   329  // Refer to the comments for op Less64F above.
   330  (Leq32F x y) -> (LessEqualF (FCMPS x y))
   331  (Leq64F x y) -> (LessEqualF (FCMPD x y))
   332  
   333  (Leq8U x y)  -> (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   334  (Leq16U x y) -> (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   335  (Leq32U x y) -> (LessEqualU (CMPW x y))
   336  (Leq64U x y) -> (LessEqualU (CMP x y))
   337  
   338  (Greater8 x y)  -> (GreaterThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   339  (Greater16 x y) -> (GreaterThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   340  (Greater32 x y) -> (GreaterThan (CMPW x y))
   341  (Greater64 x y) -> (GreaterThan (CMP x y))
   342  (Greater32F x y) -> (GreaterThanF (FCMPS x y))
   343  (Greater64F x y) -> (GreaterThanF (FCMPD x y))
   344  
   345  (Greater8U x y)  -> (GreaterThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   346  (Greater16U x y) -> (GreaterThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   347  (Greater32U x y) -> (GreaterThanU (CMPW x y))
   348  (Greater64U x y) -> (GreaterThanU (CMP x y))
   349  
   350  (Geq8 x y)  -> (GreaterEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   351  (Geq16 x y) -> (GreaterEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   352  (Geq32 x y) -> (GreaterEqual (CMPW x y))
   353  (Geq64 x y) -> (GreaterEqual (CMP x y))
   354  (Geq32F x y) -> (GreaterEqualF (FCMPS x y))
   355  (Geq64F x y) -> (GreaterEqualF (FCMPD x y))
   356  
   357  (Geq8U x y)  -> (GreaterEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   358  (Geq16U x y) -> (GreaterEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   359  (Geq32U x y) -> (GreaterEqualU (CMPW x y))
   360  (Geq64U x y) -> (GreaterEqualU (CMP x y))
   361  
   362  // Optimize comparison between a floating-point value and 0.0 with "FCMP $(0.0), Fn"
   363  (FCMPS x (FMOVSconst [0])) -> (FCMPS0 x)
   364  (FCMPS (FMOVSconst [0]) x) -> (InvertFlags (FCMPS0 x))
   365  (FCMPD x (FMOVDconst [0])) -> (FCMPD0 x)
   366  (FCMPD (FMOVDconst [0]) x) -> (InvertFlags (FCMPD0 x))
   367  
   368  // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
   369  (CondSelect x y boolval) && flagArg(boolval) != nil -> (CSEL {boolval.Op} x y flagArg(boolval))
   370  (CondSelect x y boolval) && flagArg(boolval) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] boolval))
   371  
   372  (OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
   373  (OffPtr [off] ptr) -> (ADDconst [off] ptr)
   374  
   375  (Addr {sym} base) -> (MOVDaddr {sym} base)
   376  (LocalAddr {sym} base _) -> (MOVDaddr {sym} base)
   377  
   378  // loads
   379  (Load <t> ptr mem) && t.IsBoolean() -> (MOVBUload ptr mem)
   380  (Load <t> ptr mem) && (is8BitInt(t) && isSigned(t)) -> (MOVBload ptr mem)
   381  (Load <t> ptr mem) && (is8BitInt(t) && !isSigned(t)) -> (MOVBUload ptr mem)
   382  (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
   383  (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
   384  (Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) -> (MOVWload ptr mem)
   385  (Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) -> (MOVWUload ptr mem)
   386  (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
   387  (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
   388  (Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
   389  
   390  // stores
   391  (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
   392  (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
   393  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
   394  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
   395  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
   396  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
   397  
   398  // zeroing
   399  (Zero [0] _ mem) -> mem
   400  (Zero [1] ptr mem) -> (MOVBstore ptr (MOVDconst [0]) mem)
   401  (Zero [2] ptr mem) -> (MOVHstore ptr (MOVDconst [0]) mem)
   402  (Zero [4] ptr mem) -> (MOVWstore ptr (MOVDconst [0]) mem)
   403  (Zero [8] ptr mem) -> (MOVDstore ptr (MOVDconst [0]) mem)
   404  
   405  (Zero [3] ptr mem) ->
   406  	(MOVBstore [2] ptr (MOVDconst [0])
   407  		(MOVHstore ptr (MOVDconst [0]) mem))
   408  (Zero [5] ptr mem) ->
   409  	(MOVBstore [4] ptr (MOVDconst [0])
   410  		(MOVWstore ptr (MOVDconst [0]) mem))
   411  (Zero [6] ptr mem) ->
   412  	(MOVHstore [4] ptr (MOVDconst [0])
   413  		(MOVWstore ptr (MOVDconst [0]) mem))
   414  (Zero [7] ptr mem) ->
   415  	(MOVBstore [6] ptr (MOVDconst [0])
   416  		(MOVHstore [4] ptr (MOVDconst [0])
   417  			(MOVWstore ptr (MOVDconst [0]) mem)))
   418  (Zero [9] ptr mem) ->
   419  	(MOVBstore [8] ptr (MOVDconst [0])
   420  		(MOVDstore ptr (MOVDconst [0]) mem))
   421  (Zero [10] ptr mem) ->
   422  	(MOVHstore [8] ptr (MOVDconst [0])
   423  		(MOVDstore ptr (MOVDconst [0]) mem))
   424  (Zero [11] ptr mem) ->
   425  	(MOVBstore [10] ptr (MOVDconst [0])
   426  		(MOVHstore [8] ptr (MOVDconst [0])
   427  			(MOVDstore ptr (MOVDconst [0]) mem)))
   428  (Zero [12] ptr mem) ->
   429  	(MOVWstore [8] ptr (MOVDconst [0])
   430  		(MOVDstore ptr (MOVDconst [0]) mem))
   431  (Zero [13] ptr mem) ->
   432  	(MOVBstore [12] ptr (MOVDconst [0])
   433  		(MOVWstore [8] ptr (MOVDconst [0])
   434  			(MOVDstore ptr (MOVDconst [0]) mem)))
   435  (Zero [14] ptr mem) ->
   436  	(MOVHstore [12] ptr (MOVDconst [0])
   437  		(MOVWstore [8] ptr (MOVDconst [0])
   438  			(MOVDstore ptr (MOVDconst [0]) mem)))
   439  (Zero [15] ptr mem) ->
   440  	(MOVBstore [14] ptr (MOVDconst [0])
   441  		(MOVHstore [12] ptr (MOVDconst [0])
   442  			(MOVWstore [8] ptr (MOVDconst [0])
   443  				(MOVDstore ptr (MOVDconst [0]) mem))))
   444  (Zero [16] ptr mem) ->
   445  	(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
   446  
   447  (Zero [32] ptr mem) ->
   448  	(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   449  		(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
   450  
   451  (Zero [48] ptr mem) ->
   452  	(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   453  		(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   454  			(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
   455  
   456  (Zero [64] ptr mem) ->
   457  	(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
   458  		(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   459  			(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   460  				(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
   461  
   462  // strip off fractional word zeroing
   463  (Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 ->
   464  	(Zero [8]
   465  		(OffPtr <ptr.Type> ptr [s-8])
   466  		(Zero [s-s%16] ptr mem))
   467  (Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 ->
   468  	(Zero [16]
   469  		(OffPtr <ptr.Type> ptr [s-16])
   470  		(Zero [s-s%16] ptr mem))
   471  
   472  // medium zeroing uses a duff device
   473  // 4, 16, and 64 are magic constants, see runtime/mkduff.go
   474  (Zero [s] ptr mem)
   475  	&& s%16 == 0 && s > 64 && s <= 16*64
   476  	&& !config.noDuffDevice ->
   477  	(DUFFZERO [4 * (64 - s/16)] ptr mem)
   478  
   479  // large zeroing uses a loop
   480  (Zero [s] ptr mem)
   481  	&& s%16 == 0 && (s > 16*64 || config.noDuffDevice) ->
   482  	(LoweredZero
   483  		ptr
   484  		(ADDconst <ptr.Type> [s-16] ptr)
   485  		mem)
   486  
   487  // moves
   488  (Move [0] _ _ mem) -> mem
   489  (Move [1] dst src mem) -> (MOVBstore dst (MOVBUload src mem) mem)
   490  (Move [2] dst src mem) -> (MOVHstore dst (MOVHUload src mem) mem)
   491  (Move [4] dst src mem) -> (MOVWstore dst (MOVWUload src mem) mem)
   492  (Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
   493  
   494  (Move [3] dst src mem) ->
   495  	(MOVBstore [2] dst (MOVBUload [2] src mem)
   496  		(MOVHstore dst (MOVHUload src mem) mem))
   497  (Move [5] dst src mem) ->
   498  	(MOVBstore [4] dst (MOVBUload [4] src mem)
   499  		(MOVWstore dst (MOVWUload src mem) mem))
   500  (Move [6] dst src mem) ->
   501  	(MOVHstore [4] dst (MOVHUload [4] src mem)
   502  		(MOVWstore dst (MOVWUload src mem) mem))
   503  (Move [7] dst src mem) ->
   504  	(MOVBstore [6] dst (MOVBUload [6] src mem)
   505  		(MOVHstore [4] dst (MOVHUload [4] src mem)
   506  			(MOVWstore dst (MOVWUload src mem) mem)))
   507  (Move [12] dst src mem) ->
   508  	(MOVWstore [8] dst (MOVWUload [8] src mem)
   509  		(MOVDstore dst (MOVDload src mem) mem))
   510  (Move [16] dst src mem) ->
   511  	(MOVDstore [8] dst (MOVDload [8] src mem)
   512  		(MOVDstore dst (MOVDload src mem) mem))
   513  (Move [24] dst src mem) ->
   514  	(MOVDstore [16] dst (MOVDload [16] src mem)
   515  		(MOVDstore [8] dst (MOVDload [8] src mem)
   516  			(MOVDstore dst (MOVDload src mem) mem)))
   517  
   518  // strip off fractional word move
   519  (Move [s] dst src mem) && s%8 != 0 && s > 8 ->
   520  	(Move [s%8]
   521  		(OffPtr <dst.Type> dst [s-s%8])
   522  		(OffPtr <src.Type> src [s-s%8])
   523  		(Move [s-s%8] dst src mem))
   524  
   525  // medium move uses a duff device
   526  (Move [s] dst src mem)
   527  	&& s > 32 && s <= 16*64 && s%16 == 8
   528  	&& !config.noDuffDevice ->
   529  	(MOVDstore [s-8] dst (MOVDload [s-8] src mem)
   530  		(DUFFCOPY <types.TypeMem> [8*(64-(s-8)/16)] dst src mem))
   531  (Move [s] dst src mem)
   532  	&& s > 32 && s <= 16*64 && s%16 == 0
   533  	&& !config.noDuffDevice ->
   534  	(DUFFCOPY [8 * (64 - s/16)] dst src mem)
   535  // 8 is the number of bytes to encode:
   536  //
   537  // LDP.P   16(R16), (R26, R27)
   538  // STP.P   (R26, R27), 16(R17)
   539  //
   540  // 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy
   541  
   542  // large move uses a loop
   543  (Move [s] dst src mem)
   544  	&& s > 24 && s%8 == 0 ->
   545  	(LoweredMove
   546  		dst
   547  		src
   548  		(ADDconst <src.Type> src [s-8])
   549  		mem)
   550  
   551  // calls
   552  (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
   553  (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
   554  (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
   555  
   556  // checks
   557  (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
   558  (IsNonNil ptr) -> (NotEqual (CMPconst [0] ptr))
   559  (IsInBounds idx len) -> (LessThanU (CMP idx len))
   560  (IsSliceInBounds idx len) -> (LessEqualU (CMP idx len))
   561  
   562  // pseudo-ops
   563  (GetClosurePtr) -> (LoweredGetClosurePtr)
   564  (GetCallerSP) -> (LoweredGetCallerSP)
   565  (GetCallerPC) -> (LoweredGetCallerPC)
   566  
   567  // Absorb pseudo-ops into blocks.
   568  (If (Equal cc) yes no) -> (EQ cc yes no)
   569  (If (NotEqual cc) yes no) -> (NE cc yes no)
   570  (If (LessThan cc) yes no) -> (LT cc yes no)
   571  (If (LessThanU cc) yes no) -> (ULT cc yes no)
   572  (If (LessEqual cc) yes no) -> (LE cc yes no)
   573  (If (LessEqualU cc) yes no) -> (ULE cc yes no)
   574  (If (GreaterThan cc) yes no) -> (GT cc yes no)
   575  (If (GreaterThanU cc) yes no) -> (UGT cc yes no)
   576  (If (GreaterEqual cc) yes no) -> (GE cc yes no)
   577  (If (GreaterEqualU cc) yes no) -> (UGE cc yes no)
   578  (If (LessThanF cc) yes no) -> (FLT cc yes no)
   579  (If (LessEqualF cc) yes no) -> (FLE cc yes no)
   580  (If (GreaterThanF cc) yes no) -> (FGT cc yes no)
   581  (If (GreaterEqualF cc) yes no) -> (FGE cc yes no)
   582  
   583  (If cond yes no) -> (NZ cond yes no)
   584  
   585  // atomic intrinsics
   586  // Note: these ops do not accept offset.
   587  (AtomicLoad8   ptr mem) -> (LDARB ptr mem)
   588  (AtomicLoad32  ptr mem) -> (LDARW ptr mem)
   589  (AtomicLoad64  ptr mem) -> (LDAR  ptr mem)
   590  (AtomicLoadPtr ptr mem) -> (LDAR  ptr mem)
   591  
   592  (AtomicStore8       ptr val mem) -> (STLRB ptr val mem)
   593  (AtomicStore32      ptr val mem) -> (STLRW ptr val mem)
   594  (AtomicStore64      ptr val mem) -> (STLR  ptr val mem)
   595  (AtomicStorePtrNoWB ptr val mem) -> (STLR  ptr val mem)
   596  
   597  (AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
   598  (AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
   599  
   600  (AtomicAdd32 ptr val mem) -> (LoweredAtomicAdd32 ptr val mem)
   601  (AtomicAdd64 ptr val mem) -> (LoweredAtomicAdd64 ptr val mem)
   602  
   603  (AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
   604  (AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
   605  
   606  // Currently the updated value is not used, but we need a register to temporarily hold it.
   607  (AtomicAnd8 ptr val mem) -> (Select1 (LoweredAtomicAnd8 ptr val mem))
   608  (AtomicOr8  ptr val mem) -> (Select1 (LoweredAtomicOr8  ptr val mem))
   609  
   610  (AtomicAdd32Variant ptr val mem) -> (LoweredAtomicAdd32Variant ptr val mem)
   611  (AtomicAdd64Variant ptr val mem) -> (LoweredAtomicAdd64Variant ptr val mem)
   612  
   613  // Write barrier.
   614  (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
   615  
   616  (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
   617  (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
   618  (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
   619  
   620  // Optimizations
   621  
   622  // Absorb boolean tests into block
   623  (NZ (Equal cc) yes no) -> (EQ cc yes no)
   624  (NZ (NotEqual cc) yes no) -> (NE cc yes no)
   625  (NZ (LessThan cc) yes no) -> (LT cc yes no)
   626  (NZ (LessThanU cc) yes no) -> (ULT cc yes no)
   627  (NZ (LessEqual cc) yes no) -> (LE cc yes no)
   628  (NZ (LessEqualU cc) yes no) -> (ULE cc yes no)
   629  (NZ (GreaterThan cc) yes no) -> (GT cc yes no)
   630  (NZ (GreaterThanU cc) yes no) -> (UGT cc yes no)
   631  (NZ (GreaterEqual cc) yes no) -> (GE cc yes no)
   632  (NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no)
   633  (NZ (LessThanF cc) yes no) -> (FLT cc yes no)
   634  (NZ (LessEqualF cc) yes no) -> (FLE cc yes no)
   635  (NZ (GreaterThanF cc) yes no) -> (FGT cc yes no)
   636  (NZ (GreaterEqualF cc) yes no) -> (FGE cc yes no)
   637  
   638  (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTWconst [c] y) yes no)
   639  (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTWconst [c] y) yes no)
   640  (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTWconst [c] y) yes no)
   641  (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTWconst [c] y) yes no)
   642  (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTWconst [c] y) yes no)
   643  (GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTWconst [c] y) yes no)
   644  
   645  (EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
   646  (NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
   647  (LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LT (TST x y) yes no)
   648  (LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LE (TST x y) yes no)
   649  (GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GT (TST x y) yes no)
   650  (GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GE (TST x y) yes no)
   651  
   652  (EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TSTW x y) yes no)
   653  (NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TSTW x y) yes no)
   654  (LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LT (TSTW x y) yes no)
   655  (LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LE (TSTW x y) yes no)
   656  (GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GT (TSTW x y) yes no)
   657  (GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GE (TSTW x y) yes no)
   658  
   659  (EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTconst [c] y) yes no)
   660  (NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTconst [c] y) yes no)
   661  (LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTconst [c] y) yes no)
   662  (LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTconst [c] y) yes no)
   663  (GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTconst [c] y) yes no)
   664  (GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTconst [c] y) yes no)
   665  
   666  (EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (CMNconst [c] y) yes no)
   667  (NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (NE (CMNconst [c] y) yes no)
   668  (LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LT (CMNconst [c] y) yes no)
   669  (LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LE (CMNconst [c] y) yes no)
   670  (GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GT (CMNconst [c] y) yes no)
   671  (GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GE (CMNconst [c] y) yes no)
   672  
   673  (EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (CMNWconst [c] y) yes no)
   674  (NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (NE (CMNWconst [c] y) yes no)
   675  (LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LT (CMNWconst [c] y) yes no)
   676  (LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LE (CMNWconst [c] y) yes no)
   677  (GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GT (CMNWconst [c] y) yes no)
   678  (GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GE (CMNWconst [c] y) yes no)
   679  
   680  (EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
   681  (NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
   682  (LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LT (CMN x y) yes no)
   683  (LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LE (CMN x y) yes no)
   684  (GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GT (CMN x y) yes no)
   685  (GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GE (CMN x y) yes no)
   686  
   687  (EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMNW x y) yes no)
   688  (NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMNW x y) yes no)
   689  (LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LT (CMNW x y) yes no)
   690  (LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LE (CMNW x y) yes no)
   691  (GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GT (CMNW x y) yes no)
   692  (GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GE (CMNW x y) yes no)
   693  
   694  (EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
   695  (NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
   696  (LT (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (LT (CMN x y) yes no)
   697  (LE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (LE (CMN x y) yes no)
   698  (GT (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (GT (CMN x y) yes no)
   699  (GE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (GE (CMN x y) yes no)
   700  
   701  (EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMNW x y) yes no)
   702  (NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMNW x y) yes no)
   703  (LT (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (LT (CMNW x y) yes no)
   704  (LE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (LE (CMNW x y) yes no)
   705  (GT (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (GT (CMNW x y) yes no)
   706  (GE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (GE (CMNW x y) yes no)
   707  
   708  (EQ (CMPconst [0] x) yes no) -> (Z x yes no)
   709  (NE (CMPconst [0] x) yes no) -> (NZ x yes no)
   710  (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
   711  (NE (CMPWconst [0] x) yes no) -> (NZW x yes no)
   712  
   713  (EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (EQ (CMN a (MUL <x.Type> x y)) yes no)
   714  (NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (NE (CMN a (MUL <x.Type> x y)) yes no)
   715  (LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LT (CMN a (MUL <x.Type> x y)) yes no)
   716  (LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LE (CMN a (MUL <x.Type> x y)) yes no)
   717  (GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GT (CMN a (MUL <x.Type> x y)) yes no)
   718  (GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GE (CMN a (MUL <x.Type> x y)) yes no)
   719  
   720  (EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (EQ (CMP a (MUL <x.Type> x y)) yes no)
   721  (NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (NE (CMP a (MUL <x.Type> x y)) yes no)
   722  (LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LE (CMP a (MUL <x.Type> x y)) yes no)
   723  (LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LT (CMP a (MUL <x.Type> x y)) yes no)
   724  (GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GE (CMP a (MUL <x.Type> x y)) yes no)
   725  (GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GT (CMP a (MUL <x.Type> x y)) yes no)
   726  
   727  (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (EQ (CMNW a (MULW <x.Type> x y)) yes no)
   728  (NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (NE (CMNW a (MULW <x.Type> x y)) yes no)
   729  (LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LE (CMNW a (MULW <x.Type> x y)) yes no)
   730  (LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LT (CMNW a (MULW <x.Type> x y)) yes no)
   731  (GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GE (CMNW a (MULW <x.Type> x y)) yes no)
   732  (GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GT (CMNW a (MULW <x.Type> x y)) yes no)
   733  
   734  (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (EQ (CMPW a (MULW <x.Type> x y)) yes no)
   735  (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (NE (CMPW a (MULW <x.Type> x y)) yes no)
   736  (LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LE (CMPW a (MULW <x.Type> x y)) yes no)
   737  (LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LT (CMPW a (MULW <x.Type> x y)) yes no)
   738  (GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GE (CMPW a (MULW <x.Type> x y)) yes no)
   739  (GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GT (CMPW a (MULW <x.Type> x y)) yes no)
   740  
   741  // Absorb bit-tests into block
   742  (Z  (ANDconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
   743  (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
   744  (ZW  (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
   745  (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
   746  (EQ (TSTconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
   747  (NE (TSTconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
   748  (EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
   749  (NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
   750  
   751  // Test sign-bit for signed comparisons against zero
   752  (GE (CMPWconst [0] x) yes no) -> (TBZ  {int64(31)} x yes no)
   753  (GE (CMPconst [0] x) yes no) -> (TBZ  {int64(63)} x yes no)
   754  (LT (CMPWconst [0] x) yes no) -> (TBNZ  {int64(31)} x yes no)
   755  (LT (CMPconst [0] x) yes no) -> (TBNZ  {int64(63)} x yes no)
   756  
   757  // fold offset into address
   758  (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) -> (MOVDaddr [off1+off2] {sym} ptr)
   759  
   760  // fold address into load/store
   761  (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   762  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   763  	(MOVBload [off1+off2] {sym} ptr mem)
   764  (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   765  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   766  	(MOVBUload [off1+off2] {sym} ptr mem)
   767  (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   768  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   769  	(MOVHload [off1+off2] {sym} ptr mem)
   770  (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   771  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   772  	(MOVHUload [off1+off2] {sym} ptr mem)
   773  (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   774  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   775  	(MOVWload [off1+off2] {sym} ptr mem)
   776  (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   777  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   778  	(MOVWUload [off1+off2] {sym} ptr mem)
   779  (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   780  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   781  	(MOVDload [off1+off2] {sym} ptr mem)
   782  (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   783  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   784  	(FMOVSload [off1+off2] {sym} ptr mem)
   785  (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   786  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   787  	(FMOVDload [off1+off2] {sym} ptr mem)
   788  
   789  // register indexed load
   790  (MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx ptr idx mem)
   791  (MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx ptr idx mem)
   792  (MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx ptr idx mem)
   793  (MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx ptr idx mem)
   794  (MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx ptr idx mem)
   795  (MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBUloadidx ptr idx mem)
   796  (MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBloadidx ptr idx mem)
   797  (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (FMOVSloadidx ptr idx mem)
   798  (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (FMOVDloadidx ptr idx mem)
   799  (MOVDloadidx ptr (MOVDconst [c]) mem) -> (MOVDload [c] ptr mem)
   800  (MOVDloadidx (MOVDconst [c]) ptr mem) -> (MOVDload [c] ptr mem)
   801  (MOVWUloadidx ptr (MOVDconst [c]) mem) -> (MOVWUload [c] ptr mem)
   802  (MOVWUloadidx (MOVDconst [c]) ptr mem) -> (MOVWUload [c] ptr mem)
   803  (MOVWloadidx ptr (MOVDconst [c]) mem) -> (MOVWload [c] ptr mem)
   804  (MOVWloadidx (MOVDconst [c]) ptr mem) -> (MOVWload [c] ptr mem)
   805  (MOVHUloadidx ptr (MOVDconst [c]) mem) -> (MOVHUload [c] ptr mem)
   806  (MOVHUloadidx (MOVDconst [c]) ptr mem) -> (MOVHUload [c] ptr mem)
   807  (MOVHloadidx ptr (MOVDconst [c]) mem) -> (MOVHload [c] ptr mem)
   808  (MOVHloadidx (MOVDconst [c]) ptr mem) -> (MOVHload [c] ptr mem)
   809  (MOVBUloadidx ptr (MOVDconst [c]) mem) -> (MOVBUload [c] ptr mem)
   810  (MOVBUloadidx (MOVDconst [c]) ptr mem) -> (MOVBUload [c] ptr mem)
   811  (MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
   812  (MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
   813  (FMOVSloadidx ptr (MOVDconst [c]) mem) -> (FMOVSload [c] ptr mem)
   814  (FMOVSloadidx (MOVDconst [c]) ptr mem) -> (FMOVSload [c] ptr mem)
   815  (FMOVDloadidx ptr (MOVDconst [c]) mem) -> (FMOVDload [c] ptr mem)
   816  (FMOVDloadidx (MOVDconst [c]) ptr mem) -> (FMOVDload [c] ptr mem)
   817  
   818  // shifted register indexed load
   819  (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx8 ptr idx mem)
   820  (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx4 ptr idx mem)
   821  (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx4 ptr idx mem)
   822  (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx2 ptr idx mem)
   823  (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx2 ptr idx mem)
   824  (MOVDloadidx ptr (SLLconst [3] idx) mem) -> (MOVDloadidx8 ptr idx mem)
   825  (MOVWloadidx ptr (SLLconst [2] idx) mem) -> (MOVWloadidx4 ptr idx mem)
   826  (MOVWUloadidx ptr (SLLconst [2] idx) mem) -> (MOVWUloadidx4 ptr idx mem)
   827  (MOVHloadidx ptr (SLLconst [1] idx) mem) -> (MOVHloadidx2 ptr idx mem)
   828  (MOVHUloadidx ptr (SLLconst [1] idx) mem) -> (MOVHUloadidx2 ptr idx mem)
   829  (MOVHloadidx ptr (ADD idx idx) mem) -> (MOVHloadidx2 ptr idx mem)
   830  (MOVHUloadidx ptr (ADD idx idx) mem) -> (MOVHUloadidx2 ptr idx mem)
   831  (MOVDloadidx (SLLconst [3] idx) ptr mem) -> (MOVDloadidx8 ptr idx mem)
   832  (MOVWloadidx (SLLconst [2] idx) ptr mem) -> (MOVWloadidx4 ptr idx mem)
   833  (MOVWUloadidx (SLLconst [2] idx) ptr mem) -> (MOVWUloadidx4 ptr idx mem)
   834  (MOVHloadidx (ADD idx idx) ptr mem) -> (MOVHloadidx2 ptr idx mem)
   835  (MOVHUloadidx (ADD idx idx) ptr mem) -> (MOVHUloadidx2 ptr idx mem)
   836  (MOVDloadidx8 ptr (MOVDconst [c]) mem) -> (MOVDload [c<<3] ptr mem)
   837  (MOVWUloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWUload [c<<2] ptr mem)
   838  (MOVWloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWload [c<<2] ptr mem)
   839  (MOVHUloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHUload [c<<1] ptr mem)
   840  (MOVHloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHload [c<<1] ptr mem)
   841  
   842  (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   843  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   844  	(MOVBstore [off1+off2] {sym} ptr val mem)
   845  (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   846  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   847  	(MOVHstore [off1+off2] {sym} ptr val mem)
   848  (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   849  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   850  	(MOVWstore [off1+off2] {sym} ptr val mem)
   851  (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   852  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   853  	(MOVDstore [off1+off2] {sym} ptr val mem)
   854  (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(off1+off2)
   855  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   856  	(STP [off1+off2] {sym} ptr val1 val2 mem)
   857  (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   858  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   859  	(FMOVSstore [off1+off2] {sym} ptr val mem)
   860  (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   861  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   862  	(FMOVDstore [off1+off2] {sym} ptr val mem)
   863  (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   864  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   865  	(MOVBstorezero [off1+off2] {sym} ptr mem)
   866  (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   867  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   868  	(MOVHstorezero [off1+off2] {sym} ptr mem)
   869  (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   870  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   871  	(MOVWstorezero [off1+off2] {sym} ptr mem)
   872  (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   873  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   874  	(MOVDstorezero [off1+off2] {sym} ptr mem)
   875  (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   876  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   877  	(MOVQstorezero [off1+off2] {sym} ptr mem)
   878  
   879  // register indexed store
   880  (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx ptr idx val mem)
   881  (MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx ptr idx val mem)
   882  (MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx ptr idx val mem)
   883  (MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVBstoreidx ptr idx val mem)
   884  (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (FMOVDstoreidx ptr idx val mem)
   885  (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (FMOVSstoreidx ptr idx val mem)
   886  (MOVDstoreidx ptr (MOVDconst [c]) val mem) -> (MOVDstore [c] ptr val mem)
   887  (MOVDstoreidx (MOVDconst [c]) idx val mem) -> (MOVDstore [c] idx val mem)
   888  (MOVWstoreidx ptr (MOVDconst [c]) val mem) -> (MOVWstore [c] ptr val mem)
   889  (MOVWstoreidx (MOVDconst [c]) idx val mem) -> (MOVWstore [c] idx val mem)
   890  (MOVHstoreidx ptr (MOVDconst [c]) val mem) -> (MOVHstore [c] ptr val mem)
   891  (MOVHstoreidx (MOVDconst [c]) idx val mem) -> (MOVHstore [c] idx val mem)
   892  (MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
   893  (MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
   894  (FMOVDstoreidx ptr (MOVDconst [c]) val mem) -> (FMOVDstore [c] ptr val mem)
   895  (FMOVDstoreidx (MOVDconst [c]) idx val mem) -> (FMOVDstore [c] idx val mem)
   896  (FMOVSstoreidx ptr (MOVDconst [c]) val mem) -> (FMOVSstore [c] ptr val mem)
   897  (FMOVSstoreidx (MOVDconst [c]) idx val mem) -> (FMOVSstore [c] idx val mem)
   898  
   899  // shifted register indexed store
   900  (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx8 ptr idx val mem)
   901  (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx4 ptr idx val mem)
   902  (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx2 ptr idx val mem)
   903  (MOVDstoreidx ptr (SLLconst [3] idx) val mem) -> (MOVDstoreidx8 ptr idx val mem)
   904  (MOVWstoreidx ptr (SLLconst [2] idx) val mem) -> (MOVWstoreidx4 ptr idx val mem)
   905  (MOVHstoreidx ptr (SLLconst [1] idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
   906  (MOVHstoreidx ptr (ADD idx idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
   907  (MOVDstoreidx (SLLconst [3] idx) ptr val mem) -> (MOVDstoreidx8 ptr idx val mem)
   908  (MOVWstoreidx (SLLconst [2] idx) ptr val mem) -> (MOVWstoreidx4 ptr idx val mem)
   909  (MOVHstoreidx (SLLconst [1] idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
   910  (MOVHstoreidx (ADD idx idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
   911  (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) -> (MOVDstore [c<<3] ptr val mem)
   912  (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) -> (MOVWstore [c<<2] ptr val mem)
   913  (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) -> (MOVHstore [c<<1] ptr val mem)
   914  
   915  (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   916  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   917  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   918  	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   919  (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   920  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   921  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   922  	(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   923  (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   924  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   925  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   926  	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   927  (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   928  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   929  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   930  	(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   931  (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   932  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   933  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   934  	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   935  (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   936  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   937  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   938  	(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   939  (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   940  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   941  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   942  	(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   943  (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   944  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   945  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   946  	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   947  (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   948  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   949  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   950  	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   951  
   952  (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   953  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   954  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   955  	(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   956  (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   957  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   958  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   959  	(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   960  (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   961  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   962  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   963  	(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   964  (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   965  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   966  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   967  	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   968  (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
   969  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   970  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   971  	(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
   972  (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   973  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   974  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   975  	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   976  (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   977  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   978  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   979  	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   980  (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   981  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   982  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   983  	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   984  (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   985  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   986  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   987  	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   988  (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   989  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   990  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   991  	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   992  (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   993  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   994  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   995  	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   996  (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   997  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   998  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   999  	(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
  1000  
  1001  // store zero
  1002  (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
  1003  (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
  1004  (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
  1005  (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
  1006  (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem)
  1007  
  1008  // register indexed store zero
  1009  (MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx ptr idx mem)
  1010  (MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx ptr idx mem)
  1011  (MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx ptr idx mem)
  1012  (MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBstorezeroidx ptr idx mem)
  1013  (MOVDstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx ptr idx mem)
  1014  (MOVWstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx ptr idx mem)
  1015  (MOVHstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx ptr idx mem)
  1016  (MOVBstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVBstorezeroidx ptr idx mem)
  1017  (MOVDstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c] ptr mem)
  1018  (MOVDstorezeroidx (MOVDconst [c]) idx mem) -> (MOVDstorezero [c] idx mem)
  1019  (MOVWstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c] ptr mem)
  1020  (MOVWstorezeroidx (MOVDconst [c]) idx mem) -> (MOVWstorezero [c] idx mem)
  1021  (MOVHstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c] ptr mem)
  1022  (MOVHstorezeroidx (MOVDconst [c]) idx mem) -> (MOVHstorezero [c] idx mem)
  1023  (MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
  1024  (MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
  1025  
  1026  // shifted register indexed store zero
  1027  (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx8 ptr idx mem)
  1028  (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx4 ptr idx mem)
  1029  (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx2 ptr idx mem)
  1030  (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) -> (MOVDstorezeroidx8 ptr idx mem)
  1031  (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) -> (MOVWstorezeroidx4 ptr idx mem)
  1032  (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
  1033  (MOVHstorezeroidx ptr (ADD idx idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
  1034  (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) -> (MOVDstorezeroidx8 ptr idx mem)
  1035  (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) -> (MOVWstorezeroidx4 ptr idx mem)
  1036  (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
  1037  (MOVHstorezeroidx (ADD idx idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
  1038  (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx8 ptr idx mem)
  1039  (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx4 ptr idx mem)
  1040  (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx2 ptr idx mem)
  1041  (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c<<3] ptr mem)
  1042  (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c<<2] ptr mem)
  1043  (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c<<1] ptr mem)
  1044  
  1045  // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
  1046  // these seem to have bad interaction with other rules, resulting in slower code
  1047  //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
  1048  //(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
  1049  //(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
  1050  //(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
  1051  //(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
  1052  //(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
  1053  //(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1054  //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1055  //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1056  
  1057  (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1058  (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1059  (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1060  (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1061  (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1062  (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1063  (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1064  
  1065  (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1066  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1067  (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1068  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1069  (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1070  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1071  (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1072  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1073  (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1074  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1075  (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1076  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1077  (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
  1078  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1079  
  1080  (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1081  (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1082  (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1083  (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1084  (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1085  
  1086  // don't extend after proper load
  1087  (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
  1088  (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1089  (MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
  1090  (MOVHreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1091  (MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
  1092  (MOVHUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1093  (MOVHUreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1094  (MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
  1095  (MOVWreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1096  (MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
  1097  (MOVWreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1098  (MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
  1099  (MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1100  (MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1101  (MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x)
  1102  (MOVBreg x:(MOVBloadidx _  _ _)) -> (MOVDreg x)
  1103  (MOVBUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1104  (MOVHreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
  1105  (MOVHreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1106  (MOVHreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
  1107  (MOVHUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1108  (MOVHUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1109  (MOVWreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
  1110  (MOVWreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1111  (MOVWreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
  1112  (MOVWreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1113  (MOVWreg x:(MOVWloadidx _ _ _)) -> (MOVDreg x)
  1114  (MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1115  (MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1116  (MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
  1117  (MOVHreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
  1118  (MOVHUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1119  (MOVWreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
  1120  (MOVWreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1121  (MOVWreg x:(MOVWloadidx4 _ _ _)) -> (MOVDreg x)
  1122  (MOVWUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1123  (MOVWUreg x:(MOVWUloadidx4 _ _ _)) -> (MOVDreg x)
  1124  
  1125  // fold double extensions
  1126  (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
  1127  (MOVBUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1128  (MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
  1129  (MOVHreg x:(MOVBUreg _)) -> (MOVDreg x)
  1130  (MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
  1131  (MOVHUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1132  (MOVHUreg x:(MOVHUreg _)) -> (MOVDreg x)
  1133  (MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
  1134  (MOVWreg x:(MOVBUreg _)) -> (MOVDreg x)
  1135  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
  1136  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
  1137  (MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
  1138  (MOVWUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1139  (MOVWUreg x:(MOVHUreg _)) -> (MOVDreg x)
  1140  (MOVWUreg x:(MOVWUreg _)) -> (MOVDreg x)
  1141  
  1142  // don't extend before store
  1143  (MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1144  (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1145  (MOVBstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1146  (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1147  (MOVBstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1148  (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1149  (MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1150  (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1151  (MOVHstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1152  (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1153  (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
  1154  (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
  1155  (MOVBstoreidx ptr idx (MOVBreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1156  (MOVBstoreidx ptr idx (MOVBUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1157  (MOVBstoreidx ptr idx (MOVHreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1158  (MOVBstoreidx ptr idx (MOVHUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1159  (MOVBstoreidx ptr idx (MOVWreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1160  (MOVBstoreidx ptr idx (MOVWUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1161  (MOVHstoreidx ptr idx (MOVHreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1162  (MOVHstoreidx ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1163  (MOVHstoreidx ptr idx (MOVWreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1164  (MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1165  (MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
  1166  (MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
  1167  (MOVHstoreidx2 ptr idx (MOVHreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1168  (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1169  (MOVHstoreidx2 ptr idx (MOVWreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1170  (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1171  (MOVWstoreidx4 ptr idx (MOVWreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
  1172  (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
  1173  
  1174  // if a register move has only 1 use, just use the same register without emitting instruction
  1175  // MOVDnop doesn't emit instruction, only for ensuring the type.
  1176  (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
  1177  
  1178  // fold constant into arithmatic ops
  1179  (ADD x (MOVDconst [c])) -> (ADDconst [c] x)
  1180  (SUB x (MOVDconst [c])) -> (SUBconst [c] x)
  1181  (AND x (MOVDconst [c])) -> (ANDconst [c] x)
  1182  (OR  x (MOVDconst [c])) -> (ORconst  [c] x)
  1183  (XOR x (MOVDconst [c])) -> (XORconst [c] x)
  1184  (TST x (MOVDconst [c])) -> (TSTconst [c] x)
  1185  (TSTW x (MOVDconst [c])) -> (TSTWconst [c] x)
  1186  (CMN x (MOVDconst [c])) -> (CMNconst [c] x)
  1187  (CMNW x (MOVDconst [c])) -> (CMNWconst [c] x)
  1188  (BIC x (MOVDconst [c])) -> (ANDconst [^c] x)
  1189  (EON x (MOVDconst [c])) -> (XORconst [^c] x)
  1190  (ORN x (MOVDconst [c])) -> (ORconst  [^c] x)
  1191  
  1192  (SLL x (MOVDconst [c])) -> (SLLconst x [c&63]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=64)
  1193  (SRL x (MOVDconst [c])) -> (SRLconst x [c&63])
  1194  (SRA x (MOVDconst [c])) -> (SRAconst x [c&63])
  1195  
  1196  (CMP x (MOVDconst [c])) -> (CMPconst [c] x)
  1197  (CMP (MOVDconst [c]) x) -> (InvertFlags (CMPconst [c] x))
  1198  (CMPW x (MOVDconst [c])) -> (CMPWconst [int64(int32(c))] x)
  1199  (CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst [int64(int32(c))] x))
  1200  
  1201  // mul-neg -> mneg
  1202  (NEG (MUL x y)) -> (MNEG x y)
  1203  (NEG (MULW x y)) -> (MNEGW x y)
  1204  (MUL (NEG x) y) -> (MNEG x y)
  1205  (MULW (NEG x) y) -> (MNEGW x y)
  1206  
  1207  // madd/msub
  1208  (ADD a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
  1209  (SUB a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
  1210  (ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
  1211  (SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
  1212  
  1213  (ADD a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
  1214  (SUB a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
  1215  (ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
  1216  (SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
  1217  
  1218  // optimize ADCSflags, SBCSflags and friends
  1219  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) -> (ADCSflags x y c)
  1220  (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (MOVDconst [0])))) -> (ADDSflags x y)
  1221  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo))))) -> (SBCSflags x y bo)
  1222  (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0])))) -> (SUBSflags x y)
  1223  
  1224  // mul by constant
  1225  (MUL x (MOVDconst [-1])) -> (NEG x)
  1226  (MUL _ (MOVDconst [0])) -> (MOVDconst [0])
  1227  (MUL x (MOVDconst [1])) -> x
  1228  (MUL x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
  1229  (MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
  1230  (MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
  1231  (MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1232  (MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1233  (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1234  (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1235  
  1236  (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
  1237  (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
  1238  (MULW x (MOVDconst [c])) && int32(c)==1 -> x
  1239  (MULW x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
  1240  (MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
  1241  (MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
  1242  (MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1243  (MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1244  (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1245  (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1246  
  1247  // mneg by constant
  1248  (MNEG x (MOVDconst [-1])) -> x
  1249  (MNEG _ (MOVDconst [0])) -> (MOVDconst [0])
  1250  (MNEG x (MOVDconst [1])) -> (NEG x)
  1251  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst <x.Type> [log2(c)] x))
  1252  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1253  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
  1254  (MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1255  (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1256  (MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1257  (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1258  
  1259  (MNEGW x (MOVDconst [c])) && int32(c)==-1 -> x
  1260  (MNEGW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
  1261  (MNEGW x (MOVDconst [c])) && int32(c)==1 -> (NEG x)
  1262  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst <x.Type> [log2(c)] x))
  1263  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1264  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
  1265  (MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1266  (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1267  (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1268  (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1269  
  1270  (MADD a x (MOVDconst [-1])) -> (SUB a x)
  1271  (MADD a _ (MOVDconst [0])) -> a
  1272  (MADD a x (MOVDconst [1])) -> (ADD a x)
  1273  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1274  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1275  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1276  (MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1277  (MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1278  (MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1279  (MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1280  
  1281  (MADD a (MOVDconst [-1]) x) -> (SUB a x)
  1282  (MADD a (MOVDconst [0]) _) -> a
  1283  (MADD a (MOVDconst [1]) x) -> (ADD a x)
  1284  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1285  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1286  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1287  (MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1288  (MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1289  (MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1290  (MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1291  
  1292  (MADDW a x (MOVDconst [c])) && int32(c)==-1 -> (SUB a x)
  1293  (MADDW a _ (MOVDconst [c])) && int32(c)==0 -> a
  1294  (MADDW a x (MOVDconst [c])) && int32(c)==1 -> (ADD a x)
  1295  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1296  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1297  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1298  (MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1299  (MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1300  (MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1301  (MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1302  
  1303  (MADDW a (MOVDconst [c]) x) && int32(c)==-1 -> (SUB a x)
  1304  (MADDW a (MOVDconst [c]) _) && int32(c)==0 -> a
  1305  (MADDW a (MOVDconst [c]) x) && int32(c)==1 -> (ADD a x)
  1306  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1307  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1308  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1309  (MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1310  (MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1311  (MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1312  (MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1313  
  1314  (MSUB a x (MOVDconst [-1])) -> (ADD a x)
  1315  (MSUB a _ (MOVDconst [0])) -> a
  1316  (MSUB a x (MOVDconst [1])) -> (SUB a x)
  1317  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1318  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1319  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1320  (MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1321  (MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1322  (MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1323  (MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1324  
  1325  (MSUB a (MOVDconst [-1]) x) -> (ADD a x)
  1326  (MSUB a (MOVDconst [0]) _) -> a
  1327  (MSUB a (MOVDconst [1]) x) -> (SUB a x)
  1328  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1329  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1330  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1331  (MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1332  (MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1333  (MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1334  (MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1335  
  1336  (MSUBW a x (MOVDconst [c])) && int32(c)==-1 -> (ADD a x)
  1337  (MSUBW a _ (MOVDconst [c])) && int32(c)==0 -> a
  1338  (MSUBW a x (MOVDconst [c])) && int32(c)==1 -> (SUB a x)
  1339  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1340  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1341  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1342  (MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1343  (MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1344  (MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1345  (MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1346  
  1347  (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 -> (ADD a x)
  1348  (MSUBW a (MOVDconst [c]) _) && int32(c)==0 -> a
  1349  (MSUBW a (MOVDconst [c]) x) && int32(c)==1 -> (SUB a x)
  1350  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1351  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1352  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1353  (MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1354  (MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1355  (MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1356  (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1357  
  1358  // div by constant
  1359  (UDIV x (MOVDconst [1])) -> x
  1360  (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
  1361  (UDIVW x (MOVDconst [c])) && uint32(c)==1 -> x
  1362  (UDIVW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) -> (SRLconst [log2(c)] x)
  1363  (UMOD _ (MOVDconst [1])) -> (MOVDconst [0])
  1364  (UMOD x (MOVDconst [c])) && isPowerOfTwo(c) -> (ANDconst [c-1] x)
  1365  (UMODW _ (MOVDconst [c])) && uint32(c)==1 -> (MOVDconst [0])
  1366  (UMODW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) -> (ANDconst [c-1] x)
  1367  
  1368  // generic simplifications
  1369  (ADD x (NEG y)) -> (SUB x y)
  1370  (SUB x x) -> (MOVDconst [0])
  1371  (AND x x) -> x
  1372  (OR  x x) -> x
  1373  (XOR x x) -> (MOVDconst [0])
  1374  (BIC x x) -> (MOVDconst [0])
  1375  (EON x x) -> (MOVDconst [-1])
  1376  (ORN x x) -> (MOVDconst [-1])
  1377  (AND x (MVN y)) -> (BIC x y)
  1378  (XOR x (MVN y)) -> (EON x y)
  1379  (OR  x (MVN y)) -> (ORN x y)
  1380  (CSEL {cc} x (MOVDconst [0]) flag) -> (CSEL0 {cc} x flag)
  1381  (CSEL {cc} (MOVDconst [0]) y flag) -> (CSEL0 {arm64Negate(cc.(Op))} y flag)
  1382  (SUB x (SUB y z)) -> (SUB (ADD <v.Type> x z) y)
  1383  (SUB (SUB x y) z) -> (SUB x (ADD <y.Type> y z))
  1384  
  1385  // remove redundant *const ops
  1386  (ADDconst [0]  x) -> x
  1387  (SUBconst [0]  x) -> x
  1388  (ANDconst [0]  _) -> (MOVDconst [0])
  1389  (ANDconst [-1] x) -> x
  1390  (ORconst  [0]  x) -> x
  1391  (ORconst  [-1] _) -> (MOVDconst [-1])
  1392  (XORconst [0]  x) -> x
  1393  (XORconst [-1] x) -> (MVN x)
  1394  
  1395  // generic constant folding
  1396  (ADDconst [c] (MOVDconst [d]))  -> (MOVDconst [c+d])
  1397  (ADDconst [c] (ADDconst [d] x)) -> (ADDconst [c+d] x)
  1398  (ADDconst [c] (SUBconst [d] x)) -> (ADDconst [c-d] x)
  1399  (SUBconst [c] (MOVDconst [d]))  -> (MOVDconst [d-c])
  1400  (SUBconst [c] (SUBconst [d] x)) -> (ADDconst [-c-d] x)
  1401  (SUBconst [c] (ADDconst [d] x)) -> (ADDconst [-c+d] x)
  1402  (SLLconst [c] (MOVDconst [d]))  -> (MOVDconst [d<<uint64(c)])
  1403  (SRLconst [c] (MOVDconst [d]))  -> (MOVDconst [int64(uint64(d)>>uint64(c))])
  1404  (SRAconst [c] (MOVDconst [d]))  -> (MOVDconst [d>>uint64(c)])
  1405  (MUL   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c*d])
  1406  (MULW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)*int32(d))])
  1407  (MNEG  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-c*d])
  1408  (MNEGW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-int64(int32(c)*int32(d))])
  1409  (MADD  (MOVDconst [c]) x y) -> (ADDconst [c] (MUL   <x.Type> x y))
  1410  (MADDW (MOVDconst [c]) x y) -> (ADDconst [c] (MULW  <x.Type> x y))
  1411  (MSUB  (MOVDconst [c]) x y) -> (ADDconst [c] (MNEG  <x.Type> x y))
  1412  (MSUBW (MOVDconst [c]) x y) -> (ADDconst [c] (MNEGW <x.Type> x y))
  1413  (MADD  a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [c*d] a)
  1414  (MADDW a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [int64(int32(c)*int32(d))] a)
  1415  (MSUB  a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [c*d] a)
  1416  (MSUBW a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [int64(int32(c)*int32(d))] a)
  1417  (DIV   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c/d])
  1418  (UDIV  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)/uint64(d))])
  1419  (DIVW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)/int32(d))])
  1420  (UDIVW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint32(c)/uint32(d))])
  1421  (MOD   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c%d])
  1422  (UMOD  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)%uint64(d))])
  1423  (MODW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)%int32(d))])
  1424  (UMODW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint32(c)%uint32(d))])
  1425  (ANDconst [c] (MOVDconst [d]))  -> (MOVDconst [c&d])
  1426  (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
  1427  (ANDconst [c] (MOVWUreg x)) -> (ANDconst [c&(1<<32-1)] x)
  1428  (ANDconst [c] (MOVHUreg x)) -> (ANDconst [c&(1<<16-1)] x)
  1429  (ANDconst [c] (MOVBUreg x)) -> (ANDconst [c&(1<<8-1)] x)
  1430  (MOVWUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<32-1)] x)
  1431  (MOVHUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<16-1)] x)
  1432  (MOVBUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<8-1)] x)
  1433  (ORconst  [c] (MOVDconst [d]))  -> (MOVDconst [c|d])
  1434  (ORconst  [c] (ORconst [d] x))  -> (ORconst [c|d] x)
  1435  (XORconst [c] (MOVDconst [d]))  -> (MOVDconst [c^d])
  1436  (XORconst [c] (XORconst [d] x)) -> (XORconst [c^d] x)
  1437  (MVN (MOVDconst [c])) -> (MOVDconst [^c])
  1438  (NEG (MOVDconst [c])) -> (MOVDconst [-c])
  1439  (MOVBreg  (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
  1440  (MOVBUreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
  1441  (MOVHreg  (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
  1442  (MOVHUreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
  1443  (MOVWreg  (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
  1444  (MOVWUreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
  1445  (MOVDreg  (MOVDconst [c])) -> (MOVDconst [c])
  1446  
  1447  // constant comparisons
  1448  (CMPconst  (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
  1449  (CMPconst  (MOVDconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT)
  1450  (CMPconst  (MOVDconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT)
  1451  (CMPconst  (MOVDconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT)
  1452  (CMPconst  (MOVDconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT)
  1453  (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
  1454  (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
  1455  (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
  1456  (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
  1457  (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
  1458  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)==0 -> (FlagEQ)
  1459  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)<0  -> (FlagLT_UGT)
  1460  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)>0  -> (FlagGT_UGT)
  1461  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)==0 -> (FlagEQ)
  1462  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)<0  -> (FlagLT_UGT)
  1463  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)>0  -> (FlagGT_UGT)
  1464  (CMNconst  (MOVDconst [x]) [y]) && int64(x)==int64(-y) -> (FlagEQ)
  1465  (CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)<uint64(-y) -> (FlagLT_ULT)
  1466  (CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)>uint64(-y) -> (FlagLT_UGT)
  1467  (CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)<uint64(-y) -> (FlagGT_ULT)
  1468  (CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)>uint64(-y) -> (FlagGT_UGT)
  1469  (CMNWconst (MOVDconst [x]) [y]) && int32(x)==int32(-y) -> (FlagEQ)
  1470  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)<uint32(-y) -> (FlagLT_ULT)
  1471  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)>uint32(-y) -> (FlagLT_UGT)
  1472  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)<uint32(-y) -> (FlagGT_ULT)
  1473  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)>uint32(-y) -> (FlagGT_UGT)
  1474  
  1475  
  1476  // other known comparisons
  1477  (CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT)
  1478  (CMPconst (MOVHUreg _) [c]) && 0xffff < c -> (FlagLT_ULT)
  1479  (CMPconst (MOVWUreg _) [c]) && 0xffffffff < c -> (FlagLT_ULT)
  1480  (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT)
  1481  (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT)
  1482  (CMPWconst (MOVBUreg _) [c]) && 0xff < int32(c) -> (FlagLT_ULT)
  1483  (CMPWconst (MOVHUreg _) [c]) && 0xffff < int32(c) -> (FlagLT_ULT)
  1484  
  1485  // absorb flag constants into branches
  1486  (EQ (FlagEQ) yes no) -> (First yes no)
  1487  (EQ (FlagLT_ULT) yes no) -> (First no yes)
  1488  (EQ (FlagLT_UGT) yes no) -> (First no yes)
  1489  (EQ (FlagGT_ULT) yes no) -> (First no yes)
  1490  (EQ (FlagGT_UGT) yes no) -> (First no yes)
  1491  
  1492  (NE (FlagEQ) yes no) -> (First no yes)
  1493  (NE (FlagLT_ULT) yes no) -> (First yes no)
  1494  (NE (FlagLT_UGT) yes no) -> (First yes no)
  1495  (NE (FlagGT_ULT) yes no) -> (First yes no)
  1496  (NE (FlagGT_UGT) yes no) -> (First yes no)
  1497  
  1498  (LT (FlagEQ) yes no) -> (First no yes)
  1499  (LT (FlagLT_ULT) yes no) -> (First yes no)
  1500  (LT (FlagLT_UGT) yes no) -> (First yes no)
  1501  (LT (FlagGT_ULT) yes no) -> (First no yes)
  1502  (LT (FlagGT_UGT) yes no) -> (First no yes)
  1503  
  1504  (LE (FlagEQ) yes no) -> (First yes no)
  1505  (LE (FlagLT_ULT) yes no) -> (First yes no)
  1506  (LE (FlagLT_UGT) yes no) -> (First yes no)
  1507  (LE (FlagGT_ULT) yes no) -> (First no yes)
  1508  (LE (FlagGT_UGT) yes no) -> (First no yes)
  1509  
  1510  (GT (FlagEQ) yes no) -> (First no yes)
  1511  (GT (FlagLT_ULT) yes no) -> (First no yes)
  1512  (GT (FlagLT_UGT) yes no) -> (First no yes)
  1513  (GT (FlagGT_ULT) yes no) -> (First yes no)
  1514  (GT (FlagGT_UGT) yes no) -> (First yes no)
  1515  
  1516  (GE (FlagEQ) yes no) -> (First yes no)
  1517  (GE (FlagLT_ULT) yes no) -> (First no yes)
  1518  (GE (FlagLT_UGT) yes no) -> (First no yes)
  1519  (GE (FlagGT_ULT) yes no) -> (First yes no)
  1520  (GE (FlagGT_UGT) yes no) -> (First yes no)
  1521  
  1522  (ULT (FlagEQ) yes no) -> (First no yes)
  1523  (ULT (FlagLT_ULT) yes no) -> (First yes no)
  1524  (ULT (FlagLT_UGT) yes no) -> (First no yes)
  1525  (ULT (FlagGT_ULT) yes no) -> (First yes no)
  1526  (ULT (FlagGT_UGT) yes no) -> (First no yes)
  1527  
  1528  (ULE (FlagEQ) yes no) -> (First yes no)
  1529  (ULE (FlagLT_ULT) yes no) -> (First yes no)
  1530  (ULE (FlagLT_UGT) yes no) -> (First no yes)
  1531  (ULE (FlagGT_ULT) yes no) -> (First yes no)
  1532  (ULE (FlagGT_UGT) yes no) -> (First no yes)
  1533  
  1534  (UGT (FlagEQ) yes no) -> (First no yes)
  1535  (UGT (FlagLT_ULT) yes no) -> (First no yes)
  1536  (UGT (FlagLT_UGT) yes no) -> (First yes no)
  1537  (UGT (FlagGT_ULT) yes no) -> (First no yes)
  1538  (UGT (FlagGT_UGT) yes no) -> (First yes no)
  1539  
  1540  (UGE (FlagEQ) yes no) -> (First yes no)
  1541  (UGE (FlagLT_ULT) yes no) -> (First no yes)
  1542  (UGE (FlagLT_UGT) yes no) -> (First yes no)
  1543  (UGE (FlagGT_ULT) yes no) -> (First no yes)
  1544  (UGE (FlagGT_UGT) yes no) -> (First yes no)
  1545  
  1546  (Z (MOVDconst [0]) yes no) -> (First yes no)
  1547  (Z (MOVDconst [c]) yes no) && c != 0 -> (First no yes)
  1548  (NZ (MOVDconst [0]) yes no) -> (First no yes)
  1549  (NZ (MOVDconst [c]) yes no) && c != 0 -> (First yes no)
  1550  (ZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First yes no)
  1551  (ZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First no yes)
  1552  (NZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First no yes)
  1553  (NZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First yes no)
  1554  
  1555  // absorb InvertFlags into branches
  1556  (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
  1557  (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
  1558  (LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
  1559  (GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
  1560  (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
  1561  (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
  1562  (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
  1563  (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
  1564  (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
  1565  (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
  1566  (FLT (InvertFlags cmp) yes no) -> (FGT cmp yes no)
  1567  (FGT (InvertFlags cmp) yes no) -> (FLT cmp yes no)
  1568  (FLE (InvertFlags cmp) yes no) -> (FGE cmp yes no)
  1569  (FGE (InvertFlags cmp) yes no) -> (FLE cmp yes no)
  1570  
  1571  // absorb InvertFlags into CSEL(0)
  1572  (CSEL {cc} x y (InvertFlags cmp)) -> (CSEL {arm64Invert(cc.(Op))} x y cmp)
  1573  (CSEL0 {cc} x (InvertFlags cmp)) -> (CSEL0 {arm64Invert(cc.(Op))} x cmp)
  1574  
  1575  // absorb flag constants into boolean values
  1576  (Equal (FlagEQ)) -> (MOVDconst [1])
  1577  (Equal (FlagLT_ULT)) -> (MOVDconst [0])
  1578  (Equal (FlagLT_UGT)) -> (MOVDconst [0])
  1579  (Equal (FlagGT_ULT)) -> (MOVDconst [0])
  1580  (Equal (FlagGT_UGT)) -> (MOVDconst [0])
  1581  
  1582  (NotEqual (FlagEQ)) -> (MOVDconst [0])
  1583  (NotEqual (FlagLT_ULT)) -> (MOVDconst [1])
  1584  (NotEqual (FlagLT_UGT)) -> (MOVDconst [1])
  1585  (NotEqual (FlagGT_ULT)) -> (MOVDconst [1])
  1586  (NotEqual (FlagGT_UGT)) -> (MOVDconst [1])
  1587  
  1588  (LessThan (FlagEQ)) -> (MOVDconst [0])
  1589  (LessThan (FlagLT_ULT)) -> (MOVDconst [1])
  1590  (LessThan (FlagLT_UGT)) -> (MOVDconst [1])
  1591  (LessThan (FlagGT_ULT)) -> (MOVDconst [0])
  1592  (LessThan (FlagGT_UGT)) -> (MOVDconst [0])
  1593  
  1594  (LessThanU (FlagEQ)) -> (MOVDconst [0])
  1595  (LessThanU (FlagLT_ULT)) -> (MOVDconst [1])
  1596  (LessThanU (FlagLT_UGT)) -> (MOVDconst [0])
  1597  (LessThanU (FlagGT_ULT)) -> (MOVDconst [1])
  1598  (LessThanU (FlagGT_UGT)) -> (MOVDconst [0])
  1599  
  1600  (LessEqual (FlagEQ)) -> (MOVDconst [1])
  1601  (LessEqual (FlagLT_ULT)) -> (MOVDconst [1])
  1602  (LessEqual (FlagLT_UGT)) -> (MOVDconst [1])
  1603  (LessEqual (FlagGT_ULT)) -> (MOVDconst [0])
  1604  (LessEqual (FlagGT_UGT)) -> (MOVDconst [0])
  1605  
  1606  (LessEqualU (FlagEQ)) -> (MOVDconst [1])
  1607  (LessEqualU (FlagLT_ULT)) -> (MOVDconst [1])
  1608  (LessEqualU (FlagLT_UGT)) -> (MOVDconst [0])
  1609  (LessEqualU (FlagGT_ULT)) -> (MOVDconst [1])
  1610  (LessEqualU (FlagGT_UGT)) -> (MOVDconst [0])
  1611  
  1612  (GreaterThan (FlagEQ)) -> (MOVDconst [0])
  1613  (GreaterThan (FlagLT_ULT)) -> (MOVDconst [0])
  1614  (GreaterThan (FlagLT_UGT)) -> (MOVDconst [0])
  1615  (GreaterThan (FlagGT_ULT)) -> (MOVDconst [1])
  1616  (GreaterThan (FlagGT_UGT)) -> (MOVDconst [1])
  1617  
  1618  (GreaterThanU (FlagEQ)) -> (MOVDconst [0])
  1619  (GreaterThanU (FlagLT_ULT)) -> (MOVDconst [0])
  1620  (GreaterThanU (FlagLT_UGT)) -> (MOVDconst [1])
  1621  (GreaterThanU (FlagGT_ULT)) -> (MOVDconst [0])
  1622  (GreaterThanU (FlagGT_UGT)) -> (MOVDconst [1])
  1623  
  1624  (GreaterEqual (FlagEQ)) -> (MOVDconst [1])
  1625  (GreaterEqual (FlagLT_ULT)) -> (MOVDconst [0])
  1626  (GreaterEqual (FlagLT_UGT)) -> (MOVDconst [0])
  1627  (GreaterEqual (FlagGT_ULT)) -> (MOVDconst [1])
  1628  (GreaterEqual (FlagGT_UGT)) -> (MOVDconst [1])
  1629  
  1630  (GreaterEqualU (FlagEQ)) -> (MOVDconst [1])
  1631  (GreaterEqualU (FlagLT_ULT)) -> (MOVDconst [0])
  1632  (GreaterEqualU (FlagLT_UGT)) -> (MOVDconst [1])
  1633  (GreaterEqualU (FlagGT_ULT)) -> (MOVDconst [0])
  1634  (GreaterEqualU (FlagGT_UGT)) -> (MOVDconst [1])
  1635  
  1636  // absorb InvertFlags into boolean values
  1637  (Equal (InvertFlags x)) -> (Equal x)
  1638  (NotEqual (InvertFlags x)) -> (NotEqual x)
  1639  (LessThan (InvertFlags x)) -> (GreaterThan x)
  1640  (LessThanU (InvertFlags x)) -> (GreaterThanU x)
  1641  (GreaterThan (InvertFlags x)) -> (LessThan x)
  1642  (GreaterThanU (InvertFlags x)) -> (LessThanU x)
  1643  (LessEqual (InvertFlags x)) -> (GreaterEqual x)
  1644  (LessEqualU (InvertFlags x)) -> (GreaterEqualU x)
  1645  (GreaterEqual (InvertFlags x)) -> (LessEqual x)
  1646  (GreaterEqualU (InvertFlags x)) -> (LessEqualU x)
  1647  (LessThanF (InvertFlags x)) -> (GreaterThanF x)
  1648  (LessEqualF (InvertFlags x)) -> (GreaterEqualF x)
  1649  (GreaterThanF (InvertFlags x)) -> (LessThanF x)
  1650  (GreaterEqualF (InvertFlags x)) -> (LessEqualF x)
  1651  
  1652  // Boolean-generating instructions always
  1653  // zero upper bit of the register; no need to zero-extend
  1654  (MOVBUreg x) && x.Type.IsBoolean() -> (MOVDreg x)
  1655  
  1656  // absorb flag constants into conditional instructions
  1657  (CSEL {cc} x _ flag) && ccARM64Eval(cc, flag) > 0 -> x
  1658  (CSEL {cc} _ y flag) && ccARM64Eval(cc, flag) < 0 -> y
  1659  (CSEL0 {cc} x flag) && ccARM64Eval(cc, flag) > 0 -> x
  1660  (CSEL0 {cc} _ flag) && ccARM64Eval(cc, flag) < 0 -> (MOVDconst [0])
  1661  
  1662  // absorb flags back into boolean CSEL
  1663  (CSEL {cc} x y (CMPWconst [0] boolval)) && cc.(Op) == OpARM64NotEqual && flagArg(boolval) != nil ->
  1664        (CSEL {boolval.Op} x y flagArg(boolval))
  1665  (CSEL {cc} x y (CMPWconst [0] boolval)) && cc.(Op) == OpARM64Equal && flagArg(boolval) != nil ->
  1666        (CSEL {arm64Negate(boolval.Op)} x y flagArg(boolval))
  1667  (CSEL0 {cc} x (CMPWconst [0] boolval)) && cc.(Op) == OpARM64NotEqual && flagArg(boolval) != nil ->
  1668        (CSEL0 {boolval.Op} x flagArg(boolval))
  1669  (CSEL0 {cc} x (CMPWconst [0] boolval)) && cc.(Op) == OpARM64Equal && flagArg(boolval) != nil ->
  1670        (CSEL0 {arm64Negate(boolval.Op)} x flagArg(boolval))
  1671  
  1672  // absorb shifts into ops
  1673  (NEG x:(SLLconst [c] y)) && clobberIfDead(x) -> (NEGshiftLL [c] y)
  1674  (NEG x:(SRLconst [c] y)) && clobberIfDead(x) -> (NEGshiftRL [c] y)
  1675  (NEG x:(SRAconst [c] y)) && clobberIfDead(x) -> (NEGshiftRA [c] y)
  1676  (MVN x:(SLLconst [c] y)) && clobberIfDead(x) -> (MVNshiftLL [c] y)
  1677  (MVN x:(SRLconst [c] y)) && clobberIfDead(x) -> (MVNshiftRL [c] y)
  1678  (MVN x:(SRAconst [c] y)) && clobberIfDead(x) -> (MVNshiftRA [c] y)
  1679  (ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftLL x0 y [c])
  1680  (ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRL x0 y [c])
  1681  (ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRA x0 y [c])
  1682  (SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftLL x0 y [c])
  1683  (SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRL x0 y [c])
  1684  (SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRA x0 y [c])
  1685  (AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftLL x0 y [c])
  1686  (AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRL x0 y [c])
  1687  (AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRA x0 y [c])
  1688  (OR  x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ORshiftLL  x0 y [c]) // useful for combined load
  1689  (OR  x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ORshiftRL  x0 y [c])
  1690  (OR  x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ORshiftRA  x0 y [c])
  1691  (XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (XORshiftLL x0 y [c])
  1692  (XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (XORshiftRL x0 y [c])
  1693  (XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (XORshiftRA x0 y [c])
  1694  (BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (BICshiftLL x0 y [c])
  1695  (BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (BICshiftRL x0 y [c])
  1696  (BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (BICshiftRA x0 y [c])
  1697  (ORN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ORNshiftLL x0 y [c])
  1698  (ORN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ORNshiftRL x0 y [c])
  1699  (ORN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ORNshiftRA x0 y [c])
  1700  (EON x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (EONshiftLL x0 y [c])
  1701  (EON x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (EONshiftRL x0 y [c])
  1702  (EON x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (EONshiftRA x0 y [c])
  1703  (CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftLL x0 y [c])
  1704  (CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftLL x1 y [c]))
  1705  (CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRL x0 y [c])
  1706  (CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRL x1 y [c]))
  1707  (CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRA x0 y [c])
  1708  (CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRA x1 y [c]))
  1709  (CMN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (CMNshiftLL x0 y [c])
  1710  (CMN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (CMNshiftRL x0 y [c])
  1711  (CMN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (CMNshiftRA x0 y [c])
  1712  (TST x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (TSTshiftLL x0 y [c])
  1713  (TST x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (TSTshiftRL x0 y [c])
  1714  (TST x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (TSTshiftRA x0 y [c])
  1715  
  1716  // prefer *const ops to *shift ops
  1717  (ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
  1718  (ADDshiftRL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRLconst <x.Type> x [d]))
  1719  (ADDshiftRA (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRAconst <x.Type> x [d]))
  1720  (ANDshiftLL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SLLconst <x.Type> x [d]))
  1721  (ANDshiftRL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRLconst <x.Type> x [d]))
  1722  (ANDshiftRA (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRAconst <x.Type> x [d]))
  1723  (ORshiftLL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SLLconst <x.Type> x [d]))
  1724  (ORshiftRL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRLconst <x.Type> x [d]))
  1725  (ORshiftRA  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRAconst <x.Type> x [d]))
  1726  (XORshiftLL (MOVDconst [c]) x [d]) -> (XORconst [c] (SLLconst <x.Type> x [d]))
  1727  (XORshiftRL (MOVDconst [c]) x [d]) -> (XORconst [c] (SRLconst <x.Type> x [d]))
  1728  (XORshiftRA (MOVDconst [c]) x [d]) -> (XORconst [c] (SRAconst <x.Type> x [d]))
  1729  (CMPshiftLL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
  1730  (CMPshiftRL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
  1731  (CMPshiftRA (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
  1732  (CMNshiftLL (MOVDconst [c]) x [d]) -> (CMNconst [c] (SLLconst <x.Type> x [d]))
  1733  (CMNshiftRL (MOVDconst [c]) x [d]) -> (CMNconst [c] (SRLconst <x.Type> x [d]))
  1734  (CMNshiftRA (MOVDconst [c]) x [d]) -> (CMNconst [c] (SRAconst <x.Type> x [d]))
  1735  (TSTshiftLL (MOVDconst [c]) x [d]) -> (TSTconst [c] (SLLconst <x.Type> x [d]))
  1736  (TSTshiftRL (MOVDconst [c]) x [d]) -> (TSTconst [c] (SRLconst <x.Type> x [d]))
  1737  (TSTshiftRA (MOVDconst [c]) x [d]) -> (TSTconst [c] (SRAconst <x.Type> x [d]))
  1738  
  1739  // constant folding in *shift ops
  1740  (MVNshiftLL (MOVDconst [c]) [d]) -> (MOVDconst [^int64(uint64(c)<<uint64(d))])
  1741  (MVNshiftRL (MOVDconst [c]) [d]) -> (MOVDconst [^int64(uint64(c)>>uint64(d))])
  1742  (MVNshiftRA (MOVDconst [c]) [d]) -> (MOVDconst [^(c>>uint64(d))])
  1743  (NEGshiftLL (MOVDconst [c]) [d]) -> (MOVDconst [-int64(uint64(c)<<uint64(d))])
  1744  (NEGshiftRL (MOVDconst [c]) [d]) -> (MOVDconst [-int64(uint64(c)>>uint64(d))])
  1745  (NEGshiftRA (MOVDconst [c]) [d]) -> (MOVDconst [-(c>>uint64(d))])
  1746  (ADDshiftLL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)<<uint64(d))])
  1747  (ADDshiftRL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)>>uint64(d))])
  1748  (ADDshiftRA x (MOVDconst [c]) [d]) -> (ADDconst x [c>>uint64(d)])
  1749  (SUBshiftLL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)<<uint64(d))])
  1750  (SUBshiftRL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)>>uint64(d))])
  1751  (SUBshiftRA x (MOVDconst [c]) [d]) -> (SUBconst x [c>>uint64(d)])
  1752  (ANDshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)<<uint64(d))])
  1753  (ANDshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)>>uint64(d))])
  1754  (ANDshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [c>>uint64(d)])
  1755  (ORshiftLL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)<<uint64(d))])
  1756  (ORshiftRL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)>>uint64(d))])
  1757  (ORshiftRA  x (MOVDconst [c]) [d]) -> (ORconst  x [c>>uint64(d)])
  1758  (XORshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)<<uint64(d))])
  1759  (XORshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)>>uint64(d))])
  1760  (XORshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [c>>uint64(d)])
  1761  (BICshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [^int64(uint64(c)<<uint64(d))])
  1762  (BICshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [^int64(uint64(c)>>uint64(d))])
  1763  (BICshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [^(c>>uint64(d))])
  1764  (ORNshiftLL x (MOVDconst [c]) [d]) -> (ORconst  x [^int64(uint64(c)<<uint64(d))])
  1765  (ORNshiftRL x (MOVDconst [c]) [d]) -> (ORconst  x [^int64(uint64(c)>>uint64(d))])
  1766  (ORNshiftRA x (MOVDconst [c]) [d]) -> (ORconst  x [^(c>>uint64(d))])
  1767  (EONshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [^int64(uint64(c)<<uint64(d))])
  1768  (EONshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [^int64(uint64(c)>>uint64(d))])
  1769  (EONshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [^(c>>uint64(d))])
  1770  (CMPshiftLL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)<<uint64(d))])
  1771  (CMPshiftRL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)>>uint64(d))])
  1772  (CMPshiftRA x (MOVDconst [c]) [d]) -> (CMPconst x [c>>uint64(d)])
  1773  (CMNshiftLL x (MOVDconst [c]) [d]) -> (CMNconst x [int64(uint64(c)<<uint64(d))])
  1774  (CMNshiftRL x (MOVDconst [c]) [d]) -> (CMNconst x [int64(uint64(c)>>uint64(d))])
  1775  (CMNshiftRA x (MOVDconst [c]) [d]) -> (CMNconst x [c>>uint64(d)])
  1776  (TSTshiftLL x (MOVDconst [c]) [d]) -> (TSTconst x [int64(uint64(c)<<uint64(d))])
  1777  (TSTshiftRL x (MOVDconst [c]) [d]) -> (TSTconst x [int64(uint64(c)>>uint64(d))])
  1778  (TSTshiftRA x (MOVDconst [c]) [d]) -> (TSTconst x [c>>uint64(d)])
  1779  
  1780  // simplification with *shift ops
  1781  (SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1782  (SUBshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1783  (SUBshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1784  (ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
  1785  (ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
  1786  (ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
  1787  (ORshiftLL  x y:(SLLconst x [c]) [d]) && c==d -> y
  1788  (ORshiftRL  x y:(SRLconst x [c]) [d]) && c==d -> y
  1789  (ORshiftRA  x y:(SRAconst x [c]) [d]) && c==d -> y
  1790  (XORshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1791  (XORshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1792  (XORshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1793  (BICshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1794  (BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1795  (BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1796  (EONshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1797  (EONshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1798  (EONshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1799  (ORNshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1800  (ORNshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1801  (ORNshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1802  
  1803  // Generate rotates with const shift
  1804  (ADDshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1805  ( ORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1806  (XORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1807  (ADDshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1808  ( ORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1809  (XORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1810  
  1811  (ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1812  	-> (RORWconst [32-c] x)
  1813  ( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1814  	-> (RORWconst [32-c] x)
  1815  (XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1816  	-> (RORWconst [32-c] x)
  1817  (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1818  ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1819  (XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1820  
  1821  (RORconst [c] (RORconst [d] x)) -> (RORconst [(c+d)&63] x)
  1822  (RORWconst [c] (RORWconst [d] x)) -> (RORWconst [(c+d)&31] x)
  1823  
  1824  // Generate rotates with non-const shift.
  1825  // These rules match the Go source code like
  1826  //	y &= 63
  1827  //	x << y | x >> (64-y)
  1828  // "|" can also be "^" or "+".
  1829  // As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
  1830  ((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
  1831  	(CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1832  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
  1833  	-> (ROR x (NEG <t> y))
  1834  ((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
  1835  	(CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1836  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
  1837  	-> (ROR x y)
  1838  
  1839  // These rules match the Go source code like
  1840  //	y &= 31
  1841  //	x << y | x >> (32-y)
  1842  // "|" can also be "^" or "+".
  1843  // As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
  1844  ((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
  1845  	(CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1846  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
  1847  	-> (RORW x (NEG <t> y))
  1848  ((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
  1849  	(CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1850  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
  1851  	-> (RORW x y)
  1852  
  1853  // ((x>>8) | (x<<8)) -> (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
  1854  ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) -> (REV16W x)
  1855  
  1856  // Extract from reg pair
  1857  (ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1858  ( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1859  (XORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1860  
  1861  (ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1862  	-> (EXTRWconst [32-c] x2 x)
  1863  ( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1864  	-> (EXTRWconst [32-c] x2 x)
  1865  (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
  1866  	-> (EXTRWconst [32-c] x2 x)
  1867  
  1868  // Rewrite special pairs of shifts to AND.
  1869  // On ARM64 the bitmask can fit into an instruction.
  1870  (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
  1871  (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
  1872  
  1873  // Special case setting bit as 1. An example is math.Copysign(c,-1)
  1874  (ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0  -> (ORconst [c1] x)
  1875  
  1876  // bitfield ops
  1877  
  1878  // sbfiz
  1879  // (x << lc) >> rc
  1880  (SRAconst [rc] (SLLconst [lc] x)) && lc > rc -> (SBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1881  (MOVWreg (SLLconst [lc] x)) && lc < 32 -> (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
  1882  (MOVHreg (SLLconst [lc] x)) && lc < 16 -> (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
  1883  (MOVBreg (SLLconst [lc] x)) && lc < 8 -> (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
  1884  
  1885  // sbfx
  1886  // (x << lc) >> rc
  1887  (SRAconst [rc] (SLLconst [lc] x)) && lc <= rc -> (SBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1888  (SRAconst [rc] (MOVWreg x)) && rc < 32 -> (SBFX [armBFAuxInt(rc, 32-rc)] x)
  1889  (SRAconst [rc] (MOVHreg x)) && rc < 16 -> (SBFX [armBFAuxInt(rc, 16-rc)] x)
  1890  (SRAconst [rc] (MOVBreg x)) && rc < 8 -> (SBFX [armBFAuxInt(rc, 8-rc)] x)
  1891  
  1892  // sbfiz/sbfx combinations: merge shifts into bitfield ops
  1893  (SRAconst [sc] (SBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
  1894  	-> (SBFIZ [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1895  (SRAconst [sc] (SBFIZ [bfc] x)) && sc >= getARM64BFlsb(bfc)
  1896  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1897  	-> (SBFX [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1898  
  1899  // ubfiz
  1900  // (x & ac) << sc
  1901  (SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
  1902  	-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1903  (SLLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFIZ [armBFAuxInt(sc, 32)] x)
  1904  (SLLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFIZ [armBFAuxInt(sc, 16)] x)
  1905  (SLLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFIZ [armBFAuxInt(sc, 8)] x)
  1906  // (x << sc) & ac
  1907  (ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
  1908  	-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1909  (MOVWUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, sc)
  1910  	-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
  1911  (MOVHUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, sc)
  1912  	-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
  1913  (MOVBUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, sc)
  1914  	-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
  1915  // (x << lc) >> rc
  1916  (SRLconst [rc] (SLLconst [lc] x)) && lc > rc -> (UBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
  1917  
  1918  // ubfx
  1919  // (x >> sc) & ac
  1920  (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
  1921  	-> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1922  (MOVWUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFX [armBFAuxInt(sc, 32)] x)
  1923  (MOVHUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFX [armBFAuxInt(sc, 16)] x)
  1924  (MOVBUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFX [armBFAuxInt(sc, 8)] x)
  1925  // (x & ac) >> sc
  1926  (SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
  1927  	-> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1928  (SRLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, sc)
  1929  	-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
  1930  (SRLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, sc)
  1931  	-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
  1932  (SRLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, sc)
  1933  	-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
  1934  // (x << lc) >> rc
  1935  (SRLconst [rc] (SLLconst [lc] x)) && lc < rc -> (UBFX [armBFAuxInt(rc-lc, 64-rc)] x)
  1936  
  1937  // ubfiz/ubfx combinations: merge shifts into bitfield ops
  1938  (SRLconst [sc] (UBFX [bfc] x)) && sc < getARM64BFwidth(bfc)
  1939  	-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
  1940  (UBFX [bfc] (SRLconst [sc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
  1941  	-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
  1942  (SLLconst [sc] (UBFIZ [bfc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
  1943  	-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
  1944  (UBFIZ [bfc] (SLLconst [sc] x)) && sc < getARM64BFwidth(bfc)
  1945  	-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
  1946  // ((x << c1) >> c2) >> c3
  1947  (SRLconst [sc] (UBFIZ [bfc] x)) && sc == getARM64BFlsb(bfc)
  1948  	-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
  1949  (SRLconst [sc] (UBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
  1950  	-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1951  (SRLconst [sc] (UBFIZ [bfc] x)) && sc > getARM64BFlsb(bfc)
  1952  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1953  	-> (UBFX [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1954  // ((x << c1) << c2) >> c3
  1955  (UBFX [bfc] (SLLconst [sc] x)) && sc == getARM64BFlsb(bfc)
  1956  	-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
  1957  (UBFX [bfc] (SLLconst [sc] x)) && sc < getARM64BFlsb(bfc)
  1958  	-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1959  (UBFX [bfc] (SLLconst [sc] x)) && sc > getARM64BFlsb(bfc)
  1960  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1961  	-> (UBFIZ [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1962  
  1963  // bfi
  1964  (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
  1965  	&& ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
  1966  	-> (BFI [bfc] y x)
  1967  (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
  1968  	&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
  1969  	-> (BFI [armBFAuxInt(lc-rc, 64-lc)] x y)
  1970  // bfxil
  1971  (OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
  1972  	-> (BFXIL [bfc] y x)
  1973  (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc)
  1974  	-> (BFXIL [bfc] y x)
  1975  (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
  1976  	-> (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
  1977  
  1978  // do combined loads
  1979  // little endian loads
  1980  // b[0] | b[1]<<8 -> load 16-bit
  1981  (ORshiftLL <t> [8]
  1982  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
  1983  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  1984  	&& i1 == i0+1
  1985  	&& x0.Uses == 1 && x1.Uses == 1
  1986  	&& y0.Uses == 1 && y1.Uses == 1
  1987  	&& mergePoint(b,x0,x1) != nil
  1988  	&& clobber(x0) && clobber(x1)
  1989  	&& clobber(y0) && clobber(y1)
  1990  	-> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  1991  (ORshiftLL <t> [8]
  1992  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
  1993  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  1994  	&& s == nil
  1995  	&& x0.Uses == 1 && x1.Uses == 1
  1996  	&& y0.Uses == 1 && y1.Uses == 1
  1997  	&& mergePoint(b,x0,x1) != nil
  1998  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  1999  	&& clobber(x0) && clobber(x1)
  2000  	&& clobber(y0) && clobber(y1)
  2001  	-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
  2002  (ORshiftLL <t> [8]
  2003  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
  2004  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2005  	&& x0.Uses == 1 && x1.Uses == 1
  2006  	&& y0.Uses == 1 && y1.Uses == 1
  2007  	&& mergePoint(b,x0,x1) != nil
  2008  	&& clobber(x0) && clobber(x1)
  2009  	&& clobber(y0) && clobber(y1)
  2010  	-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
  2011  
  2012  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
  2013  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2014  	            x0:(MOVHUload [i0] {s} p mem)
  2015  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2016  	y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
  2017  	&& i2 == i0+2
  2018  	&& i3 == i0+3
  2019  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2020  	&& y1.Uses == 1 && y2.Uses == 1
  2021  	&& o0.Uses == 1
  2022  	&& mergePoint(b,x0,x1,x2) != nil
  2023  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2024  	&& clobber(y1) && clobber(y2)
  2025  	&& clobber(o0)
  2026  	-> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2027  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2028  	            x0:(MOVHUloadidx ptr0 idx0 mem)
  2029  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
  2030  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2031  	&& s == nil
  2032  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2033  	&& y1.Uses == 1 && y2.Uses == 1
  2034  	&& o0.Uses == 1
  2035  	&& mergePoint(b,x0,x1,x2) != nil
  2036  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2037  	&& isSamePtr(p1, p)
  2038  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2039  	&& clobber(y1) && clobber(y2)
  2040  	&& clobber(o0)
  2041  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
  2042  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2043  	            x0:(MOVHUloadidx ptr idx mem)
  2044  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2045  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2046  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2047  	&& y1.Uses == 1 && y2.Uses == 1
  2048  	&& o0.Uses == 1
  2049  	&& mergePoint(b,x0,x1,x2) != nil
  2050  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2051  	&& clobber(y1) && clobber(y2)
  2052  	&& clobber(o0)
  2053  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
  2054  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2055  	            x0:(MOVHUloadidx2 ptr0 idx0 mem)
  2056  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
  2057  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  2058  	&& s == nil
  2059  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2060  	&& y1.Uses == 1 && y2.Uses == 1
  2061  	&& o0.Uses == 1
  2062  	&& mergePoint(b,x0,x1,x2) != nil
  2063  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2064  	&& isSamePtr(p1, p)
  2065  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2066  	&& clobber(y1) && clobber(y2)
  2067  	&& clobber(o0)
  2068  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
  2069  
  2070  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
  2071  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2072  	            x0:(MOVWUload [i0] {s} p mem)
  2073  	y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
  2074  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2075  	y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
  2076  	y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
  2077  	&& i4 == i0+4
  2078  	&& i5 == i0+5
  2079  	&& i6 == i0+6
  2080  	&& i7 == i0+7
  2081  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2082  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2083  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2084  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2085  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2086  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2087  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2088  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2089  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2090  	            x0:(MOVWUloadidx ptr0 idx0 mem)
  2091  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
  2092  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2093  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2094  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2095  	&& s == nil
  2096  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2097  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2098  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2099  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2100  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2101  	&& isSamePtr(p1, p)
  2102  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2103  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2104  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2105  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
  2106  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2107  	            x0:(MOVWUloadidx4 ptr0 idx0 mem)
  2108  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
  2109  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2110  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2111  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2112  	&& s == nil
  2113  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2114  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2115  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2116  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2117  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2118  	&& isSamePtr(p1, p)
  2119  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2120  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2121  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2122  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
  2123  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2124  	            x0:(MOVWUloadidx ptr idx mem)
  2125  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2126  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2127  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2128  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2129  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2130  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2131  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2132  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2133  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2134  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2135  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2136  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
  2137  
  2138  // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
  2139  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2140  	y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
  2141  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2142  	y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
  2143  	y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
  2144  	&& i1 == i0+1
  2145  	&& i2 == i0+2
  2146  	&& i3 == i0+3
  2147  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2148  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2149  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2150  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2151  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2152  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2153  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2154  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2155  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2156  	y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
  2157  	y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
  2158  	y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2159  	y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
  2160  	&& s == nil
  2161  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2162  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2163  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2164  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2165  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2166  	&& isSamePtr(p1, p)
  2167  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2168  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2169  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2170  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
  2171  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2172  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2173  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2174  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2175  	y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
  2176  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2177  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2178  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2179  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2180  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2181  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2182  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2183  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
  2184  
  2185  // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
  2186  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2187  	y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
  2188  	y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
  2189  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2190  	y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
  2191  	y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
  2192  	y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
  2193  	y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
  2194  	y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
  2195  	&& i1 == i0+1
  2196  	&& i2 == i0+2
  2197  	&& i3 == i0+3
  2198  	&& i4 == i0+4
  2199  	&& i5 == i0+5
  2200  	&& i6 == i0+6
  2201  	&& i7 == i0+7
  2202  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2203  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2204  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2205  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2206  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2207  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2208  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2209  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2210  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2211  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2212  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2213  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2214  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2215  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2216  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2217  	y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
  2218  	y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
  2219  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2220  	y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
  2221  	y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
  2222  	y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
  2223  	y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2224  	y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
  2225  	&& s == nil
  2226  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2227  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2228  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2229  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2230  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2231  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2232  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2233  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2234  	&& isSamePtr(p1, p)
  2235  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2236  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2237  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2238  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2239  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2240  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2241  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
  2242  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2243  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2244  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2245  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2246  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2247  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2248  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2249  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2250  	y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
  2251  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2252  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2253  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2254  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2255  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2256  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2257  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2258  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2259  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2260  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2261  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2262  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2263  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2264  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
  2265  
  2266  // big endian loads
  2267  // b[1] | b[0]<<8 -> load 16-bit, reverse
  2268  (ORshiftLL <t> [8]
  2269  	y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
  2270  	y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
  2271  	&& i1 == i0+1
  2272  	&& x0.Uses == 1 && x1.Uses == 1
  2273  	&& y0.Uses == 1 && y1.Uses == 1
  2274  	&& mergePoint(b,x0,x1) != nil
  2275  	&& clobber(x0) && clobber(x1)
  2276  	&& clobber(y0) && clobber(y1)
  2277  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
  2278  (ORshiftLL <t> [8]
  2279  	y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
  2280  	y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
  2281  	&& s == nil
  2282  	&& x0.Uses == 1 && x1.Uses == 1
  2283  	&& y0.Uses == 1 && y1.Uses == 1
  2284  	&& mergePoint(b,x0,x1) != nil
  2285  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2286  	&& clobber(x0) && clobber(x1)
  2287  	&& clobber(y0) && clobber(y1)
  2288  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
  2289  (ORshiftLL <t> [8]
  2290  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
  2291  	y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
  2292  	&& x0.Uses == 1 && x1.Uses == 1
  2293  	&& y0.Uses == 1 && y1.Uses == 1
  2294  	&& mergePoint(b,x0,x1) != nil
  2295  	&& clobber(x0) && clobber(x1)
  2296  	&& clobber(y0) && clobber(y1)
  2297  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
  2298  
  2299  // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
  2300  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2301  	y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
  2302  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2303  	y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
  2304  	&& i1 == i0+1
  2305  	&& i2 == i0+2
  2306  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2307  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2308  	&& o0.Uses == 1
  2309  	&& mergePoint(b,x0,x1,x2) != nil
  2310  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2311  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2312  	&& clobber(o0)
  2313  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2314  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2315  	y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
  2316  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2317  	y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
  2318  	&& s == nil
  2319  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2320  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2321  	&& o0.Uses == 1
  2322  	&& mergePoint(b,x0,x1,x2) != nil
  2323  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2324  	&& isSamePtr(p1, p)
  2325  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2326  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2327  	&& clobber(o0)
  2328  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2329  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2330  	y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
  2331  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2332  	y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
  2333  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2334  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2335  	&& o0.Uses == 1
  2336  	&& mergePoint(b,x0,x1,x2) != nil
  2337  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2338  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2339  	&& clobber(o0)
  2340  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2341  
  2342  // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
  2343  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2344  	y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
  2345  	y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
  2346  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2347  	y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
  2348  	y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
  2349  	&& i1 == i0+1
  2350  	&& i2 == i0+2
  2351  	&& i3 == i0+3
  2352  	&& i4 == i0+4
  2353  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2354  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2355  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2356  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2357  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2358  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2359  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2360  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2361  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2362  	y0:(REVW    x0:(MOVWUload [4] {s} p mem))
  2363  	y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
  2364  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2365  	y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2366  	y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
  2367  	&& s == nil
  2368  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2369  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2370  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2371  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2372  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2373  	&& isSamePtr(p1, p)
  2374  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2375  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2376  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2377  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2378  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2379  	y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
  2380  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2381  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2382  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2383  	y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
  2384  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2385  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2386  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2387  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2388  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2389  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2390  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2391  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2392  
  2393  // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
  2394  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2395  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2396  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2397  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2398  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2399  	&& i1 == i0+1
  2400  	&& i2 == i0+2
  2401  	&& i3 == i0+3
  2402  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2403  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2404  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2405  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2406  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2407  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2408  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2409  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2410  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2411  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2412  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2413  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2414  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2415  	&& s == nil
  2416  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2417  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2418  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2419  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2420  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2421  	&& isSamePtr(p1, p)
  2422  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2423  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2424  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2425  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2426  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2427  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2428  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2429  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2430  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2431  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2432  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2433  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2434  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2435  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2436  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2437  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2438  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2439  
  2440  // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
  2441  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2442  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2443  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2444  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2445  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2446  	y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
  2447  	y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
  2448  	y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
  2449  	y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
  2450  	&& i1 == i0+1
  2451  	&& i2 == i0+2
  2452  	&& i3 == i0+3
  2453  	&& i4 == i0+4
  2454  	&& i5 == i0+5
  2455  	&& i6 == i0+6
  2456  	&& i7 == i0+7
  2457  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2458  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2459  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2460  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2461  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2462  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2463  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2464  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2465  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2466  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2467  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2468  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2469  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2470  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2471  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2472  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2473  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2474  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2475  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2476  	y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
  2477  	y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
  2478  	y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
  2479  	y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
  2480  	&& s == nil
  2481  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2482  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2483  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2484  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2485  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2486  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2487  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2488  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2489  	&& isSamePtr(p1, p)
  2490  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2491  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2492  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2493  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2494  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2495  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2496  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2497  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2498  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2499  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2500  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2501  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2502  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2503  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2504  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2505  	y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2506  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2507  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2508  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2509  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2510  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2511  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2512  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2513  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2514  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2515  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2516  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2517  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2518  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2519  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2520  
  2521  // Combine zero stores into larger (unaligned) stores.
  2522  (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
  2523  	&& x.Uses == 1
  2524  	&& areAdjacentOffsets(i,j,1)
  2525  	&& is32Bit(min(i,j))
  2526  	&& isSamePtr(ptr0, ptr1)
  2527  	&& clobber(x)
  2528  	-> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
  2529  (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
  2530  	&& x.Uses == 1
  2531  	&& s == nil
  2532  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2533  	&& clobber(x)
  2534  	-> (MOVHstorezeroidx ptr1 idx1 mem)
  2535  (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
  2536  	&& x.Uses == 1
  2537  	&& clobber(x)
  2538  	-> (MOVHstorezeroidx ptr idx mem)
  2539  (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
  2540  	&& x.Uses == 1
  2541  	&& areAdjacentOffsets(i,j,2)
  2542  	&& is32Bit(min(i,j))
  2543  	&& isSamePtr(ptr0, ptr1)
  2544  	&& clobber(x)
  2545  	-> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
  2546  (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
  2547  	&& x.Uses == 1
  2548  	&& s == nil
  2549  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2550  	&& clobber(x)
  2551  	-> (MOVWstorezeroidx ptr1 idx1 mem)
  2552  (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
  2553  	&& x.Uses == 1
  2554  	&& clobber(x)
  2555  	-> (MOVWstorezeroidx ptr idx mem)
  2556  (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
  2557  	&& x.Uses == 1
  2558  	&& s == nil
  2559  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2560  	&& clobber(x)
  2561  	-> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
  2562  (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
  2563  	&& x.Uses == 1
  2564  	&& areAdjacentOffsets(i,j,4)
  2565  	&& is32Bit(min(i,j))
  2566  	&& isSamePtr(ptr0, ptr1)
  2567  	&& clobber(x)
  2568  	-> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
  2569  (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
  2570  	&& x.Uses == 1
  2571  	&& s == nil
  2572  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2573  	&& clobber(x)
  2574  	-> (MOVDstorezeroidx ptr1 idx1 mem)
  2575  (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
  2576  	&& x.Uses == 1
  2577  	&& clobber(x)
  2578  	-> (MOVDstorezeroidx ptr idx mem)
  2579  (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
  2580  	&& x.Uses == 1
  2581  	&& s == nil
  2582  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2583  	&& clobber(x)
  2584  	-> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
  2585  (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
  2586  	&& x.Uses == 1
  2587  	&& areAdjacentOffsets(i,j,8)
  2588  	&& is32Bit(min(i,j))
  2589  	&& isSamePtr(ptr0, ptr1)
  2590  	&& clobber(x)
  2591  	-> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
  2592  (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
  2593  	&& x.Uses == 1
  2594  	&& s == nil
  2595  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2596  	&& clobber(x)
  2597  	-> (MOVQstorezero [0] {s} p0 mem)
  2598  (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
  2599  	&& x.Uses == 1
  2600  	&& s == nil
  2601  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2602  	&& clobber(x)
  2603  	-> (MOVQstorezero [0] {s} p0 mem)
  2604  
  2605  // Combine stores into larger (unaligned) stores.
  2606  (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2607  	&& x.Uses == 1
  2608  	&& isSamePtr(ptr0, ptr1)
  2609  	&& clobber(x)
  2610  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2611  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2612  	&& x.Uses == 1
  2613  	&& s == nil
  2614  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2615  	&& clobber(x)
  2616  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2617  (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
  2618  	&& x.Uses == 1
  2619  	&& clobber(x)
  2620  	-> (MOVHstoreidx ptr idx w mem)
  2621  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2622  	&& x.Uses == 1
  2623  	&& isSamePtr(ptr0, ptr1)
  2624  	&& clobber(x)
  2625  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2626  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2627  	&& x.Uses == 1
  2628  	&& s == nil
  2629  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2630  	&& clobber(x)
  2631  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2632  (MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2633  	&& x.Uses == 1
  2634  	&& isSamePtr(ptr0, ptr1)
  2635  	&& clobber(x)
  2636  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2637  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2638  	&& x.Uses == 1
  2639  	&& s == nil
  2640  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2641  	&& clobber(x)
  2642  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2643  (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2644  	&& x.Uses == 1
  2645  	&& isSamePtr(ptr0, ptr1)
  2646  	&& clobber(x)
  2647  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2648  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
  2649  	&& x.Uses == 1
  2650  	&& s == nil
  2651  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2652  	&& clobber(x)
  2653  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2654  (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
  2655  	&& x.Uses == 1
  2656  	&& isSamePtr(ptr0, ptr1)
  2657  	&& clobber(x)
  2658  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2659  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
  2660  	&& x.Uses == 1
  2661  	&& s == nil
  2662  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2663  	&& clobber(x)
  2664  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2665  (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
  2666  	&& x.Uses == 1
  2667  	&& isSamePtr(ptr0, ptr1)
  2668  	&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
  2669  	&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
  2670  	&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
  2671  	&& clobber(x)
  2672  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2673  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
  2674  	&& x.Uses == 1
  2675  	&& s == nil
  2676  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2677  	&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
  2678  	&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
  2679  	&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
  2680  	&& clobber(x)
  2681  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2682  (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2683  	&& x.Uses == 1
  2684  	&& isSamePtr(ptr0, ptr1)
  2685  	&& clobber(x)
  2686  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2687  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2688  	&& x.Uses == 1
  2689  	&& s == nil
  2690  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2691  	&& clobber(x)
  2692  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2693  (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2694  	&& x.Uses == 1
  2695  	&& isSamePtr(ptr0, ptr1)
  2696  	&& clobber(x)
  2697  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2698  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2699  	&& x.Uses == 1
  2700  	&& s == nil
  2701  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2702  	&& clobber(x)
  2703  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2704  (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
  2705  	&& x.Uses == 1
  2706  	&& clobber(x)
  2707  	-> (MOVWstoreidx ptr idx w mem)
  2708  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2709  	&& x.Uses == 1
  2710  	&& s == nil
  2711  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2712  	&& clobber(x)
  2713  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2714  (MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2715  	&& x.Uses == 1
  2716  	&& isSamePtr(ptr0, ptr1)
  2717  	&& clobber(x)
  2718  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2719  (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2720  	&& x.Uses == 1
  2721  	&& s == nil
  2722  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2723  	&& clobber(x)
  2724  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2725  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2726  	&& x.Uses == 1
  2727  	&& s == nil
  2728  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2729  	&& clobber(x)
  2730  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2731  (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2732  	&& x.Uses == 1
  2733  	&& isSamePtr(ptr0, ptr1)
  2734  	&& clobber(x)
  2735  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2736  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
  2737  	&& x.Uses == 1
  2738  	&& s == nil
  2739  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2740  	&& clobber(x)
  2741  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2742  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2743  	&& x.Uses == 1
  2744  	&& s == nil
  2745  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2746  	&& clobber(x)
  2747  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2748  (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
  2749  	&& x.Uses == 1
  2750  	&& isSamePtr(ptr0, ptr1)
  2751  	&& clobber(x)
  2752  	-> (MOVWstore [i-2] {s} ptr0 w0 mem)
  2753  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2754  	&& x.Uses == 1
  2755  	&& s == nil
  2756  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2757  	&& clobber(x)
  2758  	-> (MOVWstoreidx ptr1 idx1 w0 mem)
  2759  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2760  	&& x.Uses == 1
  2761  	&& s == nil
  2762  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2763  	&& clobber(x)
  2764  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
  2765  (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
  2766  	&& x.Uses == 1
  2767  	&& isSamePtr(ptr0, ptr1)
  2768  	&& clobber(x)
  2769  	-> (MOVDstore [i-4] {s} ptr0 w mem)
  2770  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
  2771  	&& x.Uses == 1
  2772  	&& s == nil
  2773  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2774  	&& clobber(x)
  2775  	-> (MOVDstoreidx ptr1 idx1 w mem)
  2776  (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
  2777  	&& x.Uses == 1
  2778  	&& clobber(x)
  2779  	-> (MOVDstoreidx ptr idx w mem)
  2780  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
  2781  	&& x.Uses == 1
  2782  	&& s == nil
  2783  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2784  	&& clobber(x)
  2785  	-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
  2786  (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
  2787  	&& x.Uses == 1
  2788  	&& isSamePtr(ptr0, ptr1)
  2789  	&& clobber(x)
  2790  	-> (MOVDstore [i-4] {s} ptr0 w0 mem)
  2791  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2792  	&& x.Uses == 1
  2793  	&& s == nil
  2794  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2795  	&& clobber(x)
  2796  	-> (MOVDstoreidx ptr1 idx1 w0 mem)
  2797  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2798  	&& x.Uses == 1
  2799  	&& s == nil
  2800  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2801  	&& clobber(x)
  2802  	-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
  2803  (MOVBstore [i] {s} ptr w
  2804  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2805  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2806  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
  2807  	x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
  2808  	x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
  2809  	x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
  2810  	x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
  2811  	&& x0.Uses == 1
  2812  	&& x1.Uses == 1
  2813  	&& x2.Uses == 1
  2814  	&& x3.Uses == 1
  2815  	&& x4.Uses == 1
  2816  	&& x5.Uses == 1
  2817  	&& x6.Uses == 1
  2818  	&& clobber(x0)
  2819  	&& clobber(x1)
  2820  	&& clobber(x2)
  2821  	&& clobber(x3)
  2822  	&& clobber(x4)
  2823  	&& clobber(x5)
  2824  	&& clobber(x6)
  2825  	-> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
  2826  (MOVBstore [7] {s} p w
  2827  	x0:(MOVBstore [6] {s} p (SRLconst [8] w)
  2828  	x1:(MOVBstore [5] {s} p (SRLconst [16] w)
  2829  	x2:(MOVBstore [4] {s} p (SRLconst [24] w)
  2830  	x3:(MOVBstore [3] {s} p (SRLconst [32] w)
  2831  	x4:(MOVBstore [2] {s} p (SRLconst [40] w)
  2832  	x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
  2833  	x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
  2834  	&& x0.Uses == 1
  2835  	&& x1.Uses == 1
  2836  	&& x2.Uses == 1
  2837  	&& x3.Uses == 1
  2838  	&& x4.Uses == 1
  2839  	&& x5.Uses == 1
  2840  	&& x6.Uses == 1
  2841  	&& s == nil
  2842  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2843  	&& isSamePtr(p1, p)
  2844  	&& clobber(x0)
  2845  	&& clobber(x1)
  2846  	&& clobber(x2)
  2847  	&& clobber(x3)
  2848  	&& clobber(x4)
  2849  	&& clobber(x5)
  2850  	&& clobber(x6)
  2851  	-> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
  2852  (MOVBstore [i] {s} ptr w
  2853  	x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
  2854  	x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
  2855  	x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2856  	&& x0.Uses == 1
  2857  	&& x1.Uses == 1
  2858  	&& x2.Uses == 1
  2859  	&& clobber(x0)
  2860  	&& clobber(x1)
  2861  	&& clobber(x2)
  2862  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2863  (MOVBstore [3] {s} p w
  2864  	x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
  2865  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
  2866  	x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2867  	&& x0.Uses == 1
  2868  	&& x1.Uses == 1
  2869  	&& x2.Uses == 1
  2870  	&& s == nil
  2871  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2872  	&& isSamePtr(p1, p)
  2873  	&& clobber(x0)
  2874  	&& clobber(x1)
  2875  	&& clobber(x2)
  2876  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2877  (MOVBstoreidx ptr (ADDconst [3] idx) w
  2878  	x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2879  	x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2880  	x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2881  	&& x0.Uses == 1
  2882  	&& x1.Uses == 1
  2883  	&& x2.Uses == 1
  2884  	&& clobber(x0)
  2885  	&& clobber(x1)
  2886  	&& clobber(x2)
  2887  	-> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
  2888  (MOVBstoreidx ptr idx w
  2889  	x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
  2890  	x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
  2891  	x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
  2892  	&& x0.Uses == 1
  2893  	&& x1.Uses == 1
  2894  	&& x2.Uses == 1
  2895  	&& clobber(x0)
  2896  	&& clobber(x1)
  2897  	&& clobber(x2)
  2898  	-> (MOVWstoreidx ptr idx w mem)
  2899  (MOVBstore [i] {s} ptr w
  2900  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
  2901  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
  2902  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
  2903  	&& x0.Uses == 1
  2904  	&& x1.Uses == 1
  2905  	&& x2.Uses == 1
  2906  	&& clobber(x0)
  2907  	&& clobber(x1)
  2908  	&& clobber(x2)
  2909  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2910  (MOVBstore [3] {s} p w
  2911  	x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
  2912  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
  2913  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
  2914  	&& x0.Uses == 1
  2915  	&& x1.Uses == 1
  2916  	&& x2.Uses == 1
  2917  	&& s == nil
  2918  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2919  	&& isSamePtr(p1, p)
  2920  	&& clobber(x0)
  2921  	&& clobber(x1)
  2922  	&& clobber(x2)
  2923  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2924  (MOVBstore [i] {s} ptr w
  2925  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2926  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2927  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
  2928  	&& x0.Uses == 1
  2929  	&& x1.Uses == 1
  2930  	&& x2.Uses == 1
  2931  	&& clobber(x0)
  2932  	&& clobber(x1)
  2933  	&& clobber(x2)
  2934  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2935  (MOVBstore [3] {s} p w
  2936  	x0:(MOVBstore [2] {s} p (SRLconst [8] w)
  2937  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
  2938  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
  2939  	&& x0.Uses == 1
  2940  	&& x1.Uses == 1
  2941  	&& x2.Uses == 1
  2942  	&& s == nil
  2943  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2944  	&& isSamePtr(p1, p)
  2945  	&& clobber(x0)
  2946  	&& clobber(x1)
  2947  	&& clobber(x2)
  2948  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2949  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
  2950  	&& x.Uses == 1
  2951  	&& clobber(x)
  2952  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2953  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
  2954  	&& x.Uses == 1
  2955  	&& s == nil
  2956  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2957  	&& clobber(x)
  2958  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2959  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
  2960  	&& x.Uses == 1
  2961  	&& clobber(x)
  2962  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2963  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
  2964  	&& x.Uses == 1
  2965  	&& s == nil
  2966  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2967  	&& clobber(x)
  2968  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2969  (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
  2970  	&& x.Uses == 1
  2971  	&& clobber(x)
  2972  	-> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
  2973  (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
  2974  	&& x.Uses == 1
  2975  	&& clobber(x)
  2976  	-> (MOVHstoreidx ptr idx w mem)
  2977  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2978  	&& x.Uses == 1
  2979  	&& clobber(x)
  2980  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2981  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  2982  	&& x.Uses == 1
  2983  	&& s == nil
  2984  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2985  	&& clobber(x)
  2986  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2987  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
  2988  	&& x.Uses == 1
  2989  	&& clobber(x)
  2990  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2991  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
  2992  	&& x.Uses == 1
  2993  	&& s == nil
  2994  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2995  	&& clobber(x)
  2996  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2997  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2998  	&& x.Uses == 1
  2999  	&& clobber(x)
  3000  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  3001  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  3002  	&& x.Uses == 1
  3003  	&& s == nil
  3004  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  3005  	&& clobber(x)
  3006  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  3007  
  3008  // FP simplification
  3009  (FNEGS (FMULS x y)) -> (FNMULS x y)
  3010  (FNEGD (FMULD x y)) -> (FNMULD x y)
  3011  (FMULS (FNEGS x) y) -> (FNMULS x y)
  3012  (FMULD (FNEGD x) y) -> (FNMULD x y)
  3013  (FNEGS (FNMULS x y)) -> (FMULS x y)
  3014  (FNEGD (FNMULD x y)) -> (FMULD x y)
  3015  (FNMULS (FNEGS x) y) -> (FMULS x y)
  3016  (FNMULD (FNEGD x) y) -> (FMULD x y)
  3017  (FADDS a (FMULS x y)) -> (FMADDS a x y)
  3018  (FADDD a (FMULD x y)) -> (FMADDD a x y)
  3019  (FSUBS a (FMULS x y)) -> (FMSUBS a x y)
  3020  (FSUBD a (FMULD x y)) -> (FMSUBD a x y)
  3021  (FSUBS (FMULS x y) a) -> (FNMSUBS a x y)
  3022  (FSUBD (FMULD x y) a) -> (FNMSUBD a x y)
  3023  (FADDS a (FNMULS x y)) -> (FMSUBS a x y)
  3024  (FADDD a (FNMULD x y)) -> (FMSUBD a x y)
  3025  (FSUBS a (FNMULS x y)) -> (FMADDS a x y)
  3026  (FSUBD a (FNMULD x y)) -> (FMADDD a x y)
  3027  (FSUBS (FNMULS x y) a) -> (FNMADDS a x y)
  3028  (FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
  3029  
  3030  (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read8(sym, off))])
  3031  (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read16(sym, off, config.BigEndian))])
  3032  (MOVWUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read32(sym, off, config.BigEndian))])
  3033  (MOVDload  [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read64(sym, off, config.BigEndian))])